From 13c5e7a3e474beeb4de1eba1e5587f4aa31ba92e Mon Sep 17 00:00:00 2001
From: Julia Jiang <julia.jiang@amd.com>
Date: Wed, 10 Jul 2024 16:06:00 -0400
Subject: [PATCH] SWDEV-472723 - Correct file format and remove trailing spaces

Change-Id: Ie40c763e9391fa36d6c890cd0a171659a1502a83


[ROCm/hip-tests commit: 5d042c80fa3d79b62cc25ad0bedc2813e99571ff]
---
 projects/hip-tests/.gitattributes             |   20 +
 projects/hip-tests/catch/README.md            |    2 +-
 .../catch/unit/deviceLib/Atomic_func.cc       |  238 +-
 .../deviceLib/DoublePrecisionIntrinsics.cc    |  162 +-
 .../deviceLib/DoublePrecisionMathDevice.cc    |  266 +-
 .../unit/deviceLib/DoublePrecisionMathHost.cc |  234 +-
 .../catch/unit/deviceLib/FloatMathPrecise.cc  |  256 +-
 .../catch/unit/deviceLib/IntegerIntrinsics.cc |  136 +-
 .../catch/unit/deviceLib/SimpleAtomicsTest.cc |  596 +-
 .../deviceLib/SinglePrecisionIntrinsics.cc    |  202 +-
 .../deviceLib/SinglePrecisionMathDevice.cc    |  246 +-
 .../unit/deviceLib/SinglePrecisionMathHost.cc |  226 +-
 .../catch/unit/deviceLib/hipStdComplex.cc     |  306 +-
 .../catch/unit/deviceLib/hipTestAtomicAdd.cc  |  440 +-
 .../catch/unit/deviceLib/hipTestClock.cc      |  102 +-
 .../unit/errorHandling/hipDrvGetErrorName.cc  |  176 +-
 .../errorHandling/hipDrvGetErrorString.cc     |  176 +-
 .../hip-tests/catch/unit/g++/CMakeLists.txt   |   38 +-
 .../hip-tests/catch/unit/g++/hipMalloc.cc     |  108 +-
 projects/hip-tests/catch/unit/g++/hipMalloc.h |   42 +-
 .../hip-tests/catch/unit/gcc/CMakeLists.txt   |   56 +-
 projects/hip-tests/catch/unit/gcc/gccTest.cc  |  128 +-
 .../catch/unit/kernel/hipDynamicShared.cc     |  352 +-
 .../catch/unit/kernel/hipDynamicShared2.cc    |  188 +-
 .../catch/unit/kernel/hipEmptyKernel.cc       |  118 +-
 .../unit/kernel/hipExtLaunchKernelGGL.cc      |  276 +-
 .../catch/unit/kernel/hipGridLaunch.cc        |  244 +-
 .../unit/kernel/hipLanguageExtensions.cc      |  222 +-
 .../catch/unit/kernel/hipLaunchParm.cc        | 2038 ++---
 .../catch/unit/kernel/hipLaunchParmFunctor.cc |  928 +--
 .../hipSVMTestSharedAddressSpaceFineGrain.cpp |    2 +-
 .../hip-tests/catch/unit/p2p/CMakeLists.txt   |   48 +-
 .../unit/p2p/hipP2pLinkTypeAndHopFunc.cc      |  712 +-
 .../catch/unit/p2p/hipP2pLinkTypeAndHopFunc.h |  220 +-
 .../hip-tests/catch/unit/rtc/RtcFunctions.cpp | 6600 ++++++++---------
 .../hip-tests/catch/unit/rtc/RtcUtility.cpp   | 1016 +--
 .../catch/unit/rtc/headers/RtcFunctions.h     |  356 +-
 .../catch/unit/rtc/headers/RtcKernels.h       |  326 +-
 .../catch/unit/rtc/headers/RtcUtility.h       |  106 +-
 .../catch/unit/synchronization/CMakeLists.txt |   50 +-
 .../cache_coherency_cpu_gpu.cc                |  564 +-
 .../cache_coherency_gpu_gpu.cc                |  588 +-
 .../unit/synchronization/copy_coherency.cc    |  680 +-
 .../hip-tests/catch/unit/warp/hipShflTests.cc |  364 +-
 .../catch/unit/warp/hipShflUpDownTest.cc      |  482 +-
 .../perftests/memory/hipPerfMemset.cpp        |  874 +--
 projects/hip-tests/samples/README.md          |    2 +-
 47 files changed, 10766 insertions(+), 10746 deletions(-)
 create mode 100644 projects/hip-tests/.gitattributes

diff --git a/projects/hip-tests/.gitattributes b/projects/hip-tests/.gitattributes
new file mode 100644
index 0000000000..d5175f2f9c
--- /dev/null
+++ b/projects/hip-tests/.gitattributes
@@ -0,0 +1,20 @@
+# Set the default behavior, in case people don't have core.autolf set.
+* text=auto
+
+# Explicitly declare text files you want to always be normalized and converted
+# to have LF line endings on checkout.
+*.c text eol=lf
+*.cpp text eol=lf
+*.cc text eol=lf
+*.h text eol=lf
+*.hpp text eol=lf
+*.txt text eol=lf
+
+# Define files to support auto-remove trailing white space
+# Need to run the command below, before add modified file(s) to the staging area
+# git config filter.trimspace.clean 'sed -e "s/[[:space:]]*$//g"'
+*.cpp filter=trimspace
+*.c filter=trimspace
+*.h filter=trimspacecpp
+*.hpp filter=trimspace
+*.md filter=trimspace
\ No newline at end of file
diff --git a/projects/hip-tests/catch/README.md b/projects/hip-tests/catch/README.md
index 22a4218766..3a7dec6e66 100644
--- a/projects/hip-tests/catch/README.md
+++ b/projects/hip-tests/catch/README.md
@@ -180,7 +180,7 @@ hipcc <path_to_test.cpp> -I<HIP_SRC_DIR>/tests/catch/include <HIP_SRC_DIR>/tests
 ## Debugging support
 Catch2 allows multiple ways in which you can debug the test case.
 - `-b` options breaks into a debugger as soon as there is a failure encountered [Catch2 Options Reference](https://github.com/catchorg/Catch2/blob/devel/docs/command-line.md#breaking-into-the-debugger)
-- Catch2 provided [logging macro](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/logging.md#top) that print useful information on test case failure 
+- Catch2 provided [logging macro](https://github.com/catchorg/Catch2/blob/v2.13.6/docs/logging.md#top) that print useful information on test case failure
 - User can also call [CATCH_BREAK_INTO_DEBUGGER](https://github.com/catchorg/Catch2/blob/devel/docs/configuration.md#overriding-catchs-debug-break--b) macro to break at a certain point in a test case.
 - User can also mention filename.cc:__LineNumber__ to break into a test case via gdb.
 
diff --git a/projects/hip-tests/catch/unit/deviceLib/Atomic_func.cc b/projects/hip-tests/catch/unit/deviceLib/Atomic_func.cc
index 4a28839c6f..9a16e82d49 100644
--- a/projects/hip-tests/catch/unit/deviceLib/Atomic_func.cc
+++ b/projects/hip-tests/catch/unit/deviceLib/Atomic_func.cc
@@ -1,119 +1,119 @@
-/*
-Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_common.hh>
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-
-// Test case to validate atomicInc and atomicDec functions.
-// if TestToRun=1, then atomicInc function will be tested and validated
-// if TestToRun=2, then atomicDec function will be tested and validated.
-
-
-// kernel function for atomicInc
-static __global__ void AtomicCheckInc(int* g_ptr) {
-  atomicInc(reinterpret_cast<unsigned int*>(&g_ptr[0]), 17);
-}
-
-// kernel function for atomicDec
-static __global__ void AtomicCheckDec(int* g_ptr) {
-  atomicDec(reinterpret_cast<unsigned int*>(&g_ptr[0]), 25);
-}
-
-// verify results for atomicInc
-static int verifyResultInc(int value) {
-  int limit = 17;
-  value = (value >= limit) ? 0 : value + 1;
-  return value;
-}
-
-// verify results for atomicDec
-static int verifyResultDec(int value) {
-  int limit = 25;
-  value = ((value == 0) || (value > limit)) ? limit : value - 1;
-  return value;
-}
-
-// common fuction to launch atomic functions kernel.
-static void launchAtomicFunction(int *Hptr, int val, int TestToRun) {
-  unsigned int memSize = sizeof(int) * 1;
-  int *dptr{nullptr};
-  // allocate device memory
-  HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dptr), memSize));
-  // copy host memory to device
-  HIP_CHECK(hipMemcpy(dptr, Hptr, memSize, hipMemcpyHostToDevice));
-  // launch kernel function
-  if (TestToRun == 1) {
-    AtomicCheckInc<<<1, 1>>>(dptr);
-  } else if (TestToRun == 2) {
-    AtomicCheckDec<<<1, 1>>>(dptr);
-  }
-  // copy back from device to host
-  HIP_CHECK(hipMemcpy(Hptr, dptr, memSize, hipMemcpyDeviceToHost));
-  // verify the results.
-  if (TestToRun == 1) {
-    int result = verifyResultInc(val);
-    REQUIRE(result == Hptr[0]);
-  } else if (TestToRun == 2) {
-    int result = verifyResultDec(val);
-    REQUIRE(result == Hptr[0]);
-  }
-  // Cleanup memory
-  HIP_CHECK(hipFree(dptr));
-}
-
-TEST_CASE("Unit_AtomicFunctions_Inc") {
-  int *Hptr{nullptr};
-  int val;
-  // Allocate Host memory
-  Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
-  SECTION("Test case when value is lesser than limit") {
-    val = Hptr[0] = 10;
-    launchAtomicFunction(Hptr, val, 1);
-  }
-  SECTION("Test case when value is greater than limit") {
-    val = Hptr[0] = 20;
-    launchAtomicFunction(Hptr, val, 1);
-  }
-  SECTION("Test case when value is equal to the limit") {
-    val = Hptr[0] = 17;
-    launchAtomicFunction(Hptr, val, 1);
-  }
-  free(Hptr);
-}
-
-TEST_CASE("Unit_AtomicFunctions_Dec") {
-  int *Hptr{nullptr};
-  int val;
-  // Allocate Host memory
-  Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
-  SECTION("Test case when value is less than limit") {
-    val = Hptr[0] = 4;
-    launchAtomicFunction(Hptr, val, 2);
-  }
-  SECTION("Test case when value is greater than limit") {
-    val = Hptr[0] = 31;
-    launchAtomicFunction(Hptr, val, 2);
-  }
-  SECTION("Test case when value is equal to the limit") {
-    val = Hptr[0] = 25;
-    launchAtomicFunction(Hptr, val, 2);
-  }
-  free(Hptr);
-}
+/*
+Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+
+// Test case to validate atomicInc and atomicDec functions.
+// if TestToRun=1, then atomicInc function will be tested and validated
+// if TestToRun=2, then atomicDec function will be tested and validated.
+
+
+// kernel function for atomicInc
+static __global__ void AtomicCheckInc(int* g_ptr) {
+  atomicInc(reinterpret_cast<unsigned int*>(&g_ptr[0]), 17);
+}
+
+// kernel function for atomicDec
+static __global__ void AtomicCheckDec(int* g_ptr) {
+  atomicDec(reinterpret_cast<unsigned int*>(&g_ptr[0]), 25);
+}
+
+// verify results for atomicInc
+static int verifyResultInc(int value) {
+  int limit = 17;
+  value = (value >= limit) ? 0 : value + 1;
+  return value;
+}
+
+// verify results for atomicDec
+static int verifyResultDec(int value) {
+  int limit = 25;
+  value = ((value == 0) || (value > limit)) ? limit : value - 1;
+  return value;
+}
+
+// common fuction to launch atomic functions kernel.
+static void launchAtomicFunction(int *Hptr, int val, int TestToRun) {
+  unsigned int memSize = sizeof(int) * 1;
+  int *dptr{nullptr};
+  // allocate device memory
+  HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dptr), memSize));
+  // copy host memory to device
+  HIP_CHECK(hipMemcpy(dptr, Hptr, memSize, hipMemcpyHostToDevice));
+  // launch kernel function
+  if (TestToRun == 1) {
+    AtomicCheckInc<<<1, 1>>>(dptr);
+  } else if (TestToRun == 2) {
+    AtomicCheckDec<<<1, 1>>>(dptr);
+  }
+  // copy back from device to host
+  HIP_CHECK(hipMemcpy(Hptr, dptr, memSize, hipMemcpyDeviceToHost));
+  // verify the results.
+  if (TestToRun == 1) {
+    int result = verifyResultInc(val);
+    REQUIRE(result == Hptr[0]);
+  } else if (TestToRun == 2) {
+    int result = verifyResultDec(val);
+    REQUIRE(result == Hptr[0]);
+  }
+  // Cleanup memory
+  HIP_CHECK(hipFree(dptr));
+}
+
+TEST_CASE("Unit_AtomicFunctions_Inc") {
+  int *Hptr{nullptr};
+  int val;
+  // Allocate Host memory
+  Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
+  SECTION("Test case when value is lesser than limit") {
+    val = Hptr[0] = 10;
+    launchAtomicFunction(Hptr, val, 1);
+  }
+  SECTION("Test case when value is greater than limit") {
+    val = Hptr[0] = 20;
+    launchAtomicFunction(Hptr, val, 1);
+  }
+  SECTION("Test case when value is equal to the limit") {
+    val = Hptr[0] = 17;
+    launchAtomicFunction(Hptr, val, 1);
+  }
+  free(Hptr);
+}
+
+TEST_CASE("Unit_AtomicFunctions_Dec") {
+  int *Hptr{nullptr};
+  int val;
+  // Allocate Host memory
+  Hptr = reinterpret_cast<int*>(malloc(sizeof(int)));
+  SECTION("Test case when value is less than limit") {
+    val = Hptr[0] = 4;
+    launchAtomicFunction(Hptr, val, 2);
+  }
+  SECTION("Test case when value is greater than limit") {
+    val = Hptr[0] = 31;
+    launchAtomicFunction(Hptr, val, 2);
+  }
+  SECTION("Test case when value is equal to the limit") {
+    val = Hptr[0] = 25;
+    launchAtomicFunction(Hptr, val, 2);
+  }
+  free(Hptr);
+}
diff --git a/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionIntrinsics.cc b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionIntrinsics.cc
index 6801decb9e..207e105d2c 100644
--- a/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionIntrinsics.cc
+++ b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionIntrinsics.cc
@@ -1,81 +1,81 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
-
-#pragma GCC diagnostic ignored "-Wall"
-#pragma clang diagnostic ignored "-Wunused-variable"
-
-__device__ void double_precision_intrinsics() {
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __dadd_rd(0.0, 1.0);
-#endif
-    __dadd_rn(0.0, 1.0);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __dadd_ru(0.0, 1.0);
-    __dadd_rz(0.0, 1.0);
-    __ddiv_rd(0.0, 1.0);
-#endif
-    __ddiv_rn(0.0, 1.0);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __ddiv_ru(0.0, 1.0);
-    __ddiv_rz(0.0, 1.0);
-    __dmul_rd(1.0, 2.0);
-#endif
-    __dmul_rn(1.0, 2.0);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __dmul_ru(1.0, 2.0);
-    __dmul_rz(1.0, 2.0);
-    __drcp_rd(2.0);
-#endif
-    __drcp_rn(2.0);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __drcp_ru(2.0);
-    __drcp_rz(2.0);
-    __dsqrt_rd(4.0);
-#endif
-    __dsqrt_rn(4.0);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __dsqrt_ru(4.0);
-    __dsqrt_rz(4.0);
-    __dsub_rd(2.0, 1.0);
-#endif
-    __dsub_rn(2.0, 1.0);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __dsub_ru(2.0, 1.0);
-    __dsub_rz(2.0, 1.0);
-    __fma_rd(1.0, 2.0, 3.0);
-#endif
-    __fma_rn(1.0, 2.0, 3.0);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __fma_ru(1.0, 2.0, 3.0);
-    __fma_rz(1.0, 2.0, 3.0);
-#endif
-}
-
-__global__ void compileDoublePrecisionIntrinsics(int) {
-    double_precision_intrinsics();
-}
-
-TEST_CASE("Unit_DoublePrecisionIntrinsics") {
-  hipLaunchKernelGGL(compileDoublePrecisionIntrinsics, dim3(1, 1, 1),
-                                             dim3(1, 1, 1), 0, 0, 1);
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+
+#pragma GCC diagnostic ignored "-Wall"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+__device__ void double_precision_intrinsics() {
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __dadd_rd(0.0, 1.0);
+#endif
+    __dadd_rn(0.0, 1.0);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __dadd_ru(0.0, 1.0);
+    __dadd_rz(0.0, 1.0);
+    __ddiv_rd(0.0, 1.0);
+#endif
+    __ddiv_rn(0.0, 1.0);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __ddiv_ru(0.0, 1.0);
+    __ddiv_rz(0.0, 1.0);
+    __dmul_rd(1.0, 2.0);
+#endif
+    __dmul_rn(1.0, 2.0);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __dmul_ru(1.0, 2.0);
+    __dmul_rz(1.0, 2.0);
+    __drcp_rd(2.0);
+#endif
+    __drcp_rn(2.0);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __drcp_ru(2.0);
+    __drcp_rz(2.0);
+    __dsqrt_rd(4.0);
+#endif
+    __dsqrt_rn(4.0);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __dsqrt_ru(4.0);
+    __dsqrt_rz(4.0);
+    __dsub_rd(2.0, 1.0);
+#endif
+    __dsub_rn(2.0, 1.0);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __dsub_ru(2.0, 1.0);
+    __dsub_rz(2.0, 1.0);
+    __fma_rd(1.0, 2.0, 3.0);
+#endif
+    __fma_rn(1.0, 2.0, 3.0);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __fma_ru(1.0, 2.0, 3.0);
+    __fma_rz(1.0, 2.0, 3.0);
+#endif
+}
+
+__global__ void compileDoublePrecisionIntrinsics(int) {
+    double_precision_intrinsics();
+}
+
+TEST_CASE("Unit_DoublePrecisionIntrinsics") {
+  hipLaunchKernelGGL(compileDoublePrecisionIntrinsics, dim3(1, 1, 1),
+                                             dim3(1, 1, 1), 0, 0, 1);
+}
diff --git a/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathDevice.cc b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathDevice.cc
index 9c695a7b41..df3b988ae6 100644
--- a/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathDevice.cc
+++ b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathDevice.cc
@@ -1,133 +1,133 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
-
-
-#pragma GCC diagnostic ignored "-Wall"
-#pragma clang diagnostic ignored "-Wunused-variable"
-
-__device__ void double_precision_math_functions() {
-    int iX;
-    double fX, fY;
-
-    acos(1.0);
-    acosh(1.0);
-    asin(0.0);
-    asinh(0.0);
-    atan(0.0);
-    atan2(0.0, 1.0);
-    atanh(0.0);
-    cbrt(0.0);
-    ceil(0.0);
-    copysign(1.0, -2.0);
-    cos(0.0);
-    cosh(0.0);
-    cospi(0.0);
-    cyl_bessel_i0(0.0);
-    cyl_bessel_i1(0.0);
-    erf(0.0);
-    erfc(0.0);
-    erfcinv(2.0);
-    erfcx(0.0);
-    erfinv(1.0);
-    exp(0.0);
-    exp10(0.0);
-    exp2(0.0);
-    expm1(0.0);
-    fabs(1.0);
-    fdim(1.0, 0.0);
-    floor(0.0);
-    fma(1.0, 2.0, 3.0);
-    fmax(0.0, 0.0);
-    fmin(0.0, 0.0);
-    fmod(0.0, 1.0);
-    frexp(0.0, &iX);
-    hypot(1.0, 0.0);
-    ilogb(1.0);
-    isfinite(0.0);
-    isinf(0.0);
-    isnan(0.0);
-    j0(0.0);
-    j1(0.0);
-    jn(-1.0, 1.0);
-    ldexp(0.0, 0);
-    lgamma(1.0);
-    llrint(0.0);
-    llround(0.0);
-    log(1.0);
-    log10(1.0);
-    log1p(-1.0);
-    log2(1.0);
-    logb(1.0);
-    lrint(0.0);
-    lround(0.0);
-    modf(0.0, &fX);
-    nan("1");
-    nearbyint(0.0);
-    nextafter(0.0, 0.0);
-    fX = 1.0;
-    norm(1, &fX);
-    norm3d(1.0, 0.0, 0.0);
-    norm4d(1.0, 0.0, 0.0, 0.0);
-    normcdf(0.0);
-    normcdfinv(1.0);
-    pow(1.0, 0.0);
-    rcbrt(1.0);
-    remainder(2.0, 1.0);
-    remquo(1.0, 2.0, &iX);
-    rhypot(0.0, 1.0);
-    rint(1.0);
-    fX = 1.0;
-    rnorm(1, &fX);
-    rnorm3d(0.0, 0.0, 1.0);
-    rnorm4d(0.0, 0.0, 0.0, 1.0);
-    round(0.0);
-    rsqrt(1.0);
-    scalbln(0.0, 1);
-    scalbn(0.0, 1);
-    signbit(1.0);
-    sin(0.0);
-#if HT_AMD
-    // NV A100 has a bug in sincos(), so temporarily disbale it
-    sincos(0.0, &fX, &fY);
-#endif
-    sincospi(0.0, &fX, &fY);
-    sinh(0.0);
-    sinpi(0.0);
-    sqrt(0.0);
-    tan(0.0);
-    tanh(0.0);
-    tgamma(2.0);
-    trunc(0.0);
-    y0(1.0);
-    y1(1.0);
-    yn(1, 1.0);
-}
-
-__global__ void compileDoublePrecisionMathOnDevice(int) {
-    double_precision_math_functions();
-}
-
-TEST_CASE("Unit_DoublePrecisionMathDevice") {
-  hipLaunchKernelGGL(compileDoublePrecisionMathOnDevice, dim3(1, 1, 1),
-                                               dim3(1, 1, 1), 0, 0, 1);
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+
+
+#pragma GCC diagnostic ignored "-Wall"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+__device__ void double_precision_math_functions() {
+    int iX;
+    double fX, fY;
+
+    acos(1.0);
+    acosh(1.0);
+    asin(0.0);
+    asinh(0.0);
+    atan(0.0);
+    atan2(0.0, 1.0);
+    atanh(0.0);
+    cbrt(0.0);
+    ceil(0.0);
+    copysign(1.0, -2.0);
+    cos(0.0);
+    cosh(0.0);
+    cospi(0.0);
+    cyl_bessel_i0(0.0);
+    cyl_bessel_i1(0.0);
+    erf(0.0);
+    erfc(0.0);
+    erfcinv(2.0);
+    erfcx(0.0);
+    erfinv(1.0);
+    exp(0.0);
+    exp10(0.0);
+    exp2(0.0);
+    expm1(0.0);
+    fabs(1.0);
+    fdim(1.0, 0.0);
+    floor(0.0);
+    fma(1.0, 2.0, 3.0);
+    fmax(0.0, 0.0);
+    fmin(0.0, 0.0);
+    fmod(0.0, 1.0);
+    frexp(0.0, &iX);
+    hypot(1.0, 0.0);
+    ilogb(1.0);
+    isfinite(0.0);
+    isinf(0.0);
+    isnan(0.0);
+    j0(0.0);
+    j1(0.0);
+    jn(-1.0, 1.0);
+    ldexp(0.0, 0);
+    lgamma(1.0);
+    llrint(0.0);
+    llround(0.0);
+    log(1.0);
+    log10(1.0);
+    log1p(-1.0);
+    log2(1.0);
+    logb(1.0);
+    lrint(0.0);
+    lround(0.0);
+    modf(0.0, &fX);
+    nan("1");
+    nearbyint(0.0);
+    nextafter(0.0, 0.0);
+    fX = 1.0;
+    norm(1, &fX);
+    norm3d(1.0, 0.0, 0.0);
+    norm4d(1.0, 0.0, 0.0, 0.0);
+    normcdf(0.0);
+    normcdfinv(1.0);
+    pow(1.0, 0.0);
+    rcbrt(1.0);
+    remainder(2.0, 1.0);
+    remquo(1.0, 2.0, &iX);
+    rhypot(0.0, 1.0);
+    rint(1.0);
+    fX = 1.0;
+    rnorm(1, &fX);
+    rnorm3d(0.0, 0.0, 1.0);
+    rnorm4d(0.0, 0.0, 0.0, 1.0);
+    round(0.0);
+    rsqrt(1.0);
+    scalbln(0.0, 1);
+    scalbn(0.0, 1);
+    signbit(1.0);
+    sin(0.0);
+#if HT_AMD
+    // NV A100 has a bug in sincos(), so temporarily disbale it
+    sincos(0.0, &fX, &fY);
+#endif
+    sincospi(0.0, &fX, &fY);
+    sinh(0.0);
+    sinpi(0.0);
+    sqrt(0.0);
+    tan(0.0);
+    tanh(0.0);
+    tgamma(2.0);
+    trunc(0.0);
+    y0(1.0);
+    y1(1.0);
+    yn(1, 1.0);
+}
+
+__global__ void compileDoublePrecisionMathOnDevice(int) {
+    double_precision_math_functions();
+}
+
+TEST_CASE("Unit_DoublePrecisionMathDevice") {
+  hipLaunchKernelGGL(compileDoublePrecisionMathOnDevice, dim3(1, 1, 1),
+                                               dim3(1, 1, 1), 0, 0, 1);
+}
diff --git a/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathHost.cc b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathHost.cc
index fd4e4bf238..773d5eab0d 100644
--- a/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathHost.cc
+++ b/projects/hip-tests/catch/unit/deviceLib/DoublePrecisionMathHost.cc
@@ -1,117 +1,117 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_common.hh>
-#include <cmath>
-
-#pragma GCC diagnostic ignored "-Wall"
-#pragma clang diagnostic ignored "-Wunused-variable"
-
-__host__ static void double_precision_math_functions() {
-    int iX;
-    double fX, fY;
-
-    acos(1.0);
-    acosh(1.0);
-    asin(0.0);
-    asinh(0.0);
-    atan(0.0);
-    atan2(0.0, 1.0);
-    atanh(0.0);
-    cbrt(0.0);
-    ceil(0.0);
-    copysign(1.0, -2.0);
-    cos(0.0);
-    cosh(0.0);
-    erf(0.0);
-    erfc(0.0);
-    exp(0.0);
-    #ifdef __unix__
-    exp10(0.0);
-    #endif
-    exp2(0.0);
-    expm1(0.0);
-    fabs(1.0);
-    fdim(1.0, 0.0);
-    floor(0.0);
-    fma(1.0, 2.0, 3.0);
-    fmax(0.0, 0.0);
-    fmin(0.0, 0.0);
-    fmod(0.0, 1.0);
-    frexp(0.0, &iX);
-    hypot(1.0, 0.0);
-    ilogb(1.0);
-    std::isfinite(0.0);
-    std::isinf(0.0);
-    std::isnan(0.0);
-    #ifdef __unix__
-    j0(0.0);
-    j1(0.0);
-    jn(-1.0, 1.0);
-    #elif _WIN64
-    _j0(0.0);
-    _j1(0.0);
-    _jn(-1.0, 1.0);
-    #endif
-    ldexp(0.0, 0);
-    llrint(0.0);
-    llround(0.0);
-    log(1.0);
-    log10(1.0);
-    log1p(-1.0);
-    log2(1.0);
-    logb(1.0);
-    lrint(0.0);
-    lround(0.0);
-    modf(0.0, &fX);
-    nan("1");
-    nearbyint(0.0);
-    fX = 1.0;
-    pow(1.0, 0.0);
-    remainder(2.0, 1.0);
-    remquo(1.0, 2.0, &iX);
-    rint(1.0);
-    round(0.0);
-    scalbln(0.0, 1);
-    scalbn(0.0, 1);
-    std::signbit(1.0);
-    sin(0.0);
-    #ifdef _unix__
-    sincos(0.0, &fX, &fY);
-    #endif
-    sinh(0.0);
-    sqrt(0.0);
-    tan(0.0);
-    tanh(0.0);
-    tgamma(2.0);
-    trunc(0.0);
-    #ifdef __unix__
-    y0(1.0);
-    y1(1.0);
-    yn(1, 1.0);
-    #elif _WIN64
-    _y0(1.0);
-    _y1(1.0);
-    _yn(1, 1.0);
-    #endif
-}
-
-TEST_CASE("Unit_DoublePrecisionMathHost") {
-  double_precision_math_functions();
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+#include <cmath>
+
+#pragma GCC diagnostic ignored "-Wall"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+__host__ static void double_precision_math_functions() {
+    int iX;
+    double fX, fY;
+
+    acos(1.0);
+    acosh(1.0);
+    asin(0.0);
+    asinh(0.0);
+    atan(0.0);
+    atan2(0.0, 1.0);
+    atanh(0.0);
+    cbrt(0.0);
+    ceil(0.0);
+    copysign(1.0, -2.0);
+    cos(0.0);
+    cosh(0.0);
+    erf(0.0);
+    erfc(0.0);
+    exp(0.0);
+    #ifdef __unix__
+    exp10(0.0);
+    #endif
+    exp2(0.0);
+    expm1(0.0);
+    fabs(1.0);
+    fdim(1.0, 0.0);
+    floor(0.0);
+    fma(1.0, 2.0, 3.0);
+    fmax(0.0, 0.0);
+    fmin(0.0, 0.0);
+    fmod(0.0, 1.0);
+    frexp(0.0, &iX);
+    hypot(1.0, 0.0);
+    ilogb(1.0);
+    std::isfinite(0.0);
+    std::isinf(0.0);
+    std::isnan(0.0);
+    #ifdef __unix__
+    j0(0.0);
+    j1(0.0);
+    jn(-1.0, 1.0);
+    #elif _WIN64
+    _j0(0.0);
+    _j1(0.0);
+    _jn(-1.0, 1.0);
+    #endif
+    ldexp(0.0, 0);
+    llrint(0.0);
+    llround(0.0);
+    log(1.0);
+    log10(1.0);
+    log1p(-1.0);
+    log2(1.0);
+    logb(1.0);
+    lrint(0.0);
+    lround(0.0);
+    modf(0.0, &fX);
+    nan("1");
+    nearbyint(0.0);
+    fX = 1.0;
+    pow(1.0, 0.0);
+    remainder(2.0, 1.0);
+    remquo(1.0, 2.0, &iX);
+    rint(1.0);
+    round(0.0);
+    scalbln(0.0, 1);
+    scalbn(0.0, 1);
+    std::signbit(1.0);
+    sin(0.0);
+    #ifdef _unix__
+    sincos(0.0, &fX, &fY);
+    #endif
+    sinh(0.0);
+    sqrt(0.0);
+    tan(0.0);
+    tanh(0.0);
+    tgamma(2.0);
+    trunc(0.0);
+    #ifdef __unix__
+    y0(1.0);
+    y1(1.0);
+    yn(1, 1.0);
+    #elif _WIN64
+    _y0(1.0);
+    _y1(1.0);
+    _yn(1, 1.0);
+    #endif
+}
+
+TEST_CASE("Unit_DoublePrecisionMathHost") {
+  double_precision_math_functions();
+}
diff --git a/projects/hip-tests/catch/unit/deviceLib/FloatMathPrecise.cc b/projects/hip-tests/catch/unit/deviceLib/FloatMathPrecise.cc
index 357f2ed918..698f6c144a 100644
--- a/projects/hip-tests/catch/unit/deviceLib/FloatMathPrecise.cc
+++ b/projects/hip-tests/catch/unit/deviceLib/FloatMathPrecise.cc
@@ -1,128 +1,128 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
-#include <hip/math_functions.h>
-
-__device__ void FloatMathPrecise() {
-    int iX;
-    float fX, fY;
-
-    acosf(1.0f);
-    acoshf(1.0f);
-    asinf(0.0f);
-    asinhf(0.0f);
-    atan2f(0.0f, 1.0f);
-    atanf(0.0f);
-    atanhf(0.0f);
-    cbrtf(0.0f);
-    fX = ceilf(0.0f);
-    fX = copysignf(1.0f, -2.0f);
-    cosf(0.0f);
-    coshf(0.0f);
-    cospif(0.0f);
-    cyl_bessel_i0f(0.0f);
-    cyl_bessel_i1f(0.0f);
-    erfcf(0.0f);
-    erfcinvf(2.0f);
-    erfcxf(0.0f);
-    erff(0.0f);
-    erfinvf(1.0f);
-    exp10f(0.0f);
-    exp2f(0.0f);
-    expf(0.0f);
-    expm1f(0.0f);
-    fX = fabsf(1.0f);
-    fdimf(1.0f, 0.0f);
-    fdividef(0.0f, 1.0f);
-    fX = floorf(0.0f);
-    fmaf(1.0f, 2.0f, 3.0f);
-    fX = fmaxf(0.0f, 0.0f);
-    fX = fminf(0.0f, 0.0f);
-    fmodf(0.0f, 1.0f);
-    frexpf(0.0f, &iX);
-    hypotf(1.0f, 0.0f);
-    ilogbf(1.0f);
-    isfinite(0.0f);
-    fX = isinf(0.0f);
-    fX = isnan(0.0f);
-    j0f(0.0f);
-    j1f(0.0f);
-    jnf(-1.0f, 1.0f);
-    ldexpf(0.0f, 0);
-    lgammaf(1.0f);
-    llrintf(0.0f);
-    llroundf(0.0f);
-    log10f(1.0f);
-    log1pf(-1.0f);
-    log2f(1.0f);
-    logbf(1.0f);
-    logf(1.0f);
-    lrintf(0.0f);
-    lroundf(0.0f);
-    modff(0.0f, &fX);
-    fX = nanf("1");
-    fX = nearbyintf(0.0f);
-    nextafterf(0.0f, 0.0f);
-    norm3df(1.0f, 0.0f, 0.0f);
-    norm4df(1.0f, 0.0f, 0.0f, 0.0f);
-    normcdff(0.0f);
-    normcdfinvf(1.0f);
-    fX = 1.0f;
-    normf(1, &fX);
-    powf(1.0f, 0.0f);
-    rcbrtf(1.0f);
-    remainderf(2.0f, 1.0f);
-    remquof(1.0f, 2.0f, &iX);
-    rhypotf(0.0f, 1.0f);
-    fY = rintf(1.0f);
-    rnorm3df(0.0f, 0.0f, 1.0f);
-    rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
-    fX = 1.0f;
-    rnormf(1, &fX);
-    fY = roundf(0.0f);
-    rsqrtf(1.0f);
-    scalblnf(0.0f, 1);
-    scalbnf(0.0f, 1);
-    signbit(1.0f);
-    sincosf(0.0f, &fX, &fY);
-    sincospif(0.0f, &fX, &fY);
-    sinf(0.0f);
-    sinhf(0.0f);
-    sinpif(0.0f);
-    sqrtf(0.0f);
-    tanf(0.0f);
-    tanhf(0.0f);
-    tgammaf(2.0f);
-    fY = truncf(0.0f);
-    y0f(1.0f);
-    y1f(1.0f);
-    ynf(1, 1.0f);
-}
-
-__global__ void CompileFloatMathPrecise(int) {
-  FloatMathPrecise();
-}
-
-TEST_CASE("Unit_FloatMathPrecise") {
-    hipLaunchKernelGGL(CompileFloatMathPrecise, dim3(1, 1, 1),
-                                      dim3(1, 1, 1), 0, 0, 1);
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+#include <hip/math_functions.h>
+
+__device__ void FloatMathPrecise() {
+    int iX;
+    float fX, fY;
+
+    acosf(1.0f);
+    acoshf(1.0f);
+    asinf(0.0f);
+    asinhf(0.0f);
+    atan2f(0.0f, 1.0f);
+    atanf(0.0f);
+    atanhf(0.0f);
+    cbrtf(0.0f);
+    fX = ceilf(0.0f);
+    fX = copysignf(1.0f, -2.0f);
+    cosf(0.0f);
+    coshf(0.0f);
+    cospif(0.0f);
+    cyl_bessel_i0f(0.0f);
+    cyl_bessel_i1f(0.0f);
+    erfcf(0.0f);
+    erfcinvf(2.0f);
+    erfcxf(0.0f);
+    erff(0.0f);
+    erfinvf(1.0f);
+    exp10f(0.0f);
+    exp2f(0.0f);
+    expf(0.0f);
+    expm1f(0.0f);
+    fX = fabsf(1.0f);
+    fdimf(1.0f, 0.0f);
+    fdividef(0.0f, 1.0f);
+    fX = floorf(0.0f);
+    fmaf(1.0f, 2.0f, 3.0f);
+    fX = fmaxf(0.0f, 0.0f);
+    fX = fminf(0.0f, 0.0f);
+    fmodf(0.0f, 1.0f);
+    frexpf(0.0f, &iX);
+    hypotf(1.0f, 0.0f);
+    ilogbf(1.0f);
+    isfinite(0.0f);
+    fX = isinf(0.0f);
+    fX = isnan(0.0f);
+    j0f(0.0f);
+    j1f(0.0f);
+    jnf(-1.0f, 1.0f);
+    ldexpf(0.0f, 0);
+    lgammaf(1.0f);
+    llrintf(0.0f);
+    llroundf(0.0f);
+    log10f(1.0f);
+    log1pf(-1.0f);
+    log2f(1.0f);
+    logbf(1.0f);
+    logf(1.0f);
+    lrintf(0.0f);
+    lroundf(0.0f);
+    modff(0.0f, &fX);
+    fX = nanf("1");
+    fX = nearbyintf(0.0f);
+    nextafterf(0.0f, 0.0f);
+    norm3df(1.0f, 0.0f, 0.0f);
+    norm4df(1.0f, 0.0f, 0.0f, 0.0f);
+    normcdff(0.0f);
+    normcdfinvf(1.0f);
+    fX = 1.0f;
+    normf(1, &fX);
+    powf(1.0f, 0.0f);
+    rcbrtf(1.0f);
+    remainderf(2.0f, 1.0f);
+    remquof(1.0f, 2.0f, &iX);
+    rhypotf(0.0f, 1.0f);
+    fY = rintf(1.0f);
+    rnorm3df(0.0f, 0.0f, 1.0f);
+    rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
+    fX = 1.0f;
+    rnormf(1, &fX);
+    fY = roundf(0.0f);
+    rsqrtf(1.0f);
+    scalblnf(0.0f, 1);
+    scalbnf(0.0f, 1);
+    signbit(1.0f);
+    sincosf(0.0f, &fX, &fY);
+    sincospif(0.0f, &fX, &fY);
+    sinf(0.0f);
+    sinhf(0.0f);
+    sinpif(0.0f);
+    sqrtf(0.0f);
+    tanf(0.0f);
+    tanhf(0.0f);
+    tgammaf(2.0f);
+    fY = truncf(0.0f);
+    y0f(1.0f);
+    y1f(1.0f);
+    ynf(1, 1.0f);
+}
+
+__global__ void CompileFloatMathPrecise(int) {
+  FloatMathPrecise();
+}
+
+TEST_CASE("Unit_FloatMathPrecise") {
+    hipLaunchKernelGGL(CompileFloatMathPrecise, dim3(1, 1, 1),
+                                      dim3(1, 1, 1), 0, 0, 1);
+}
diff --git a/projects/hip-tests/catch/unit/deviceLib/IntegerIntrinsics.cc b/projects/hip-tests/catch/unit/deviceLib/IntegerIntrinsics.cc
index 68009651bd..07b9343c1d 100644
--- a/projects/hip-tests/catch/unit/deviceLib/IntegerIntrinsics.cc
+++ b/projects/hip-tests/catch/unit/deviceLib/IntegerIntrinsics.cc
@@ -1,68 +1,68 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
-#include <hip/device_functions.h>
-#include <algorithm>
-
-#pragma GCC diagnostic ignored "-Wall"
-#pragma clang diagnostic ignored "-Wunused-variable"
-
-__device__ void integer_intrinsics() {
-    __brev((unsigned int)10);
-    __brevll((uint64_t)10);
-    __byte_perm((unsigned int)0, (unsigned int)0, 0);
-    __clz(static_cast<int>(10));
-    __clzll((int64_t)10);
-    __ffs(static_cast<int>(10));
-    __ffsll((long long)(10)); // NOLINT
-    __funnelshift_l((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
-    __funnelshift_lc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
-    __funnelshift_r((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
-    __funnelshift_rc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
-    __hadd(static_cast<int>(1), static_cast<int>(3));
-    __mul24(static_cast<int>(1), static_cast<int>(2));
-    __mul64hi((int64_t)1, (int64_t)2);
-    __mulhi(static_cast<int>(1), static_cast<int>(2));
-    __popc((unsigned int)4);
-    __popcll((uint64_t)4);
-    int a = min(static_cast<int>(4), static_cast<int>(5));
-    int b = max(static_cast<int>(4), static_cast<int>(5));
-    __rhadd(static_cast<int>(1), static_cast<int>(2));
-    __sad(static_cast<int>(1), static_cast<int>(2), 0);
-    __uhadd((unsigned int)1, (unsigned int)3);
-    __umul24((unsigned int)1, (unsigned int)2);
-    __umul64hi((uint64_t)1, (uint64_t)2);
-    __umulhi((unsigned int)1, (unsigned int)2);
-    __urhadd((unsigned int)1, (unsigned int)2);
-    __usad((unsigned int)1, (unsigned int)2, 0);
-
-    assert(1);
-}
-
-__global__ void compileIntegerIntrinsics(int) {
-  integer_intrinsics();
-}
-
-TEST_CASE("Unit_IntegerIntrinsics") {
-    hipLaunchKernelGGL(compileIntegerIntrinsics, dim3(1, 1, 1),
-                                       dim3(1, 1, 1), 0, 0, 1);
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+#include <hip/device_functions.h>
+#include <algorithm>
+
+#pragma GCC diagnostic ignored "-Wall"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+__device__ void integer_intrinsics() {
+    __brev((unsigned int)10);
+    __brevll((uint64_t)10);
+    __byte_perm((unsigned int)0, (unsigned int)0, 0);
+    __clz(static_cast<int>(10));
+    __clzll((int64_t)10);
+    __ffs(static_cast<int>(10));
+    __ffsll((long long)(10)); // NOLINT
+    __funnelshift_l((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
+    __funnelshift_lc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
+    __funnelshift_r((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
+    __funnelshift_rc((unsigned int)0xfacefeed, (unsigned int)0xdeadbeef, 0);
+    __hadd(static_cast<int>(1), static_cast<int>(3));
+    __mul24(static_cast<int>(1), static_cast<int>(2));
+    __mul64hi((int64_t)1, (int64_t)2);
+    __mulhi(static_cast<int>(1), static_cast<int>(2));
+    __popc((unsigned int)4);
+    __popcll((uint64_t)4);
+    int a = min(static_cast<int>(4), static_cast<int>(5));
+    int b = max(static_cast<int>(4), static_cast<int>(5));
+    __rhadd(static_cast<int>(1), static_cast<int>(2));
+    __sad(static_cast<int>(1), static_cast<int>(2), 0);
+    __uhadd((unsigned int)1, (unsigned int)3);
+    __umul24((unsigned int)1, (unsigned int)2);
+    __umul64hi((uint64_t)1, (uint64_t)2);
+    __umulhi((unsigned int)1, (unsigned int)2);
+    __urhadd((unsigned int)1, (unsigned int)2);
+    __usad((unsigned int)1, (unsigned int)2, 0);
+
+    assert(1);
+}
+
+__global__ void compileIntegerIntrinsics(int) {
+  integer_intrinsics();
+}
+
+TEST_CASE("Unit_IntegerIntrinsics") {
+    hipLaunchKernelGGL(compileIntegerIntrinsics, dim3(1, 1, 1),
+                                       dim3(1, 1, 1), 0, 0, 1);
+}
diff --git a/projects/hip-tests/catch/unit/deviceLib/SimpleAtomicsTest.cc b/projects/hip-tests/catch/unit/deviceLib/SimpleAtomicsTest.cc
index 927344af36..89521611a0 100644
--- a/projects/hip-tests/catch/unit/deviceLib/SimpleAtomicsTest.cc
+++ b/projects/hip-tests/catch/unit/deviceLib/SimpleAtomicsTest.cc
@@ -1,298 +1,298 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <string.h>
-#include <math.h>
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
-
-#include <algorithm>
-#include <type_traits>
-
-using namespace std;
-////////////////////////////////////////////////////////////////////////////////
-// Auto-Verification Code
-////////////////////////////////////////////////////////////////////////////////
-
-bool verifyBitwise(...) {
-    return true;
-}
-
-template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
-bool verifyBitwise(T* gpuData, int len) {
-  // Atomic and
-  T val = 0xff;
-  for (int i = 0; i < len; ++i) {
-    // 9th element should be 1
-    val &= (2 * i + 7);
-  }
-  REQUIRE(val == gpuData[8]);
-
-  // atomic Or
-  val = 0;
-  for (int i = 0; i < len; ++i) {
-    // 10th element should be 0xff
-    val |= (1 << i);
-  }
-  REQUIRE(val == gpuData[9]);
-
-  // atomic Xor
-  val = 0xff;
-
-  for (int i = 0; i < len; ++i) {
-    // 11th element should be 0xff
-    val ^= i;
-  }
-
-  REQUIRE(val == gpuData[10]);
-  return true;
-}
-
-bool verifySub(...) {
-  return true;
-}
-
-template<
-    typename T,
-    typename enable_if<
-        is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
-bool verifySub(T* gpuData, int len) {
-  T val = 0;
-
-  for (int i = 0; i < len; ++i) {
-      val -= 10;
-  }
-
-  REQUIRE(val == gpuData[1]);
-  return true;
-}
-
-bool verifyExch(...) {
-  return true;
-}
-
-template<typename T, typename enable_if<!is_same<T, double> {}>::type* = nullptr> // NOLINT
-bool computeExchExch(T* gpuData, int len) {
-  T val = 0;
-
-  for (T i = 0; i < len; ++i) {
-      if (i == gpuData[2]) {
-          return true;
-          break;
-      }
-  }
-}
-
-bool VerifyIntegral(...) {
-  return true;
-}
-
-template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
-bool VerifyIntegral(T* gpuData, int len) {
-  // atomic Max
-  T val = 0;
-  for (int i = 0; i < len; ++i) {
-    // fourth element should be len-1
-    val = max(val, static_cast<T>(i));
-  }
-
-  REQUIRE(val == gpuData[3]);
-
-  // atomic Min
-  val = 1 << 8;
-
-  for (int i = 0; i < len; ++i) {
-      val = min(val, static_cast<T>(i));
-  }
-
-  REQUIRE(val == gpuData[4]);
-
-  // atomic Inc
-  T limit = 17;
-  val = 0;
-
-  for (int i = 0; i < len; ++i) {
-      val = (val >= limit) ? 0 : val + 1;
-  }
-
-  REQUIRE(val == gpuData[5]);
-
-  // atomic Dec
-  limit = 137;
-  val = 0;
-
-  for (int i = 0; i < len; ++i) {
-      val = ((val == 0) || (val > limit)) ? limit : val - 1;
-  }
-
-  REQUIRE(val == gpuData[6]);
-
-  // atomic CAS
-  for (int i = 0; i < len; ++i) {
-    // eighth element should be a member of [0, len)
-    if (static_cast<T>(i) == gpuData[7]) {
-      return true;
-      break;
-    }
-  }
-  return verifyBitwise(gpuData, len) && verifySub(gpuData, len);
-}
-
-template<typename T>
-bool verifyData(T* gpuData, int len) {
-  T val = 0;
-  for (int i = 0; i < len; ++i) {
-      val += 10;
-  }
-
-  REQUIRE(val == gpuData[0]);
-  return VerifyIntegral(gpuData, len) && verifyExch(gpuData, len);
-}
-
-__device__
-void testKernelExch(...) {}
-
-template<typename T, typename enable_if<!is_same<T, double>{}>::type* = nullptr>
-__device__
-void testKernelExch(T* g_odata) {
-  // access thread id
-  const T tid = blockDim.x * blockIdx.x + threadIdx.x;
-
-  // Atomic exchange
-  atomicExch(&g_odata[2], tid);
-}
-
-__device__
-void testKernelSub(...) {}
-
-template<
-    typename T,
-    typename enable_if<
-        is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
-__device__
-void testKernelSub(T* g_odata) {
-    // Atomic subtraction (final should be 0)
-    atomicSub(&g_odata[1], 10);
-}
-
-__device__
-void testKernelIntegral(...) {}
-
-template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
-__device__
-void testKernelIntegral(T* g_odata) {
-  // access thread id
-  const T tid = blockDim.x * blockIdx.x + threadIdx.x;
-
-  // Atomic maximum
-  atomicMax(&g_odata[3], tid);
-
-  // Atomic minimum
-  atomicMin(&g_odata[4], tid);
-
-  // Atomic increment (modulo 17+1)
-  atomicInc((unsigned int*)&g_odata[5], 17);
-
-  // Atomic decrement
-  atomicDec((unsigned int*)&g_odata[6], 137);
-
-  // Atomic compare-and-swap
-  atomicCAS(&g_odata[7], tid - 1, tid);
-
-  // Bitwise atomic instructions
-
-  // Atomic AND
-  atomicAnd(&g_odata[8], 2 * tid + 7);
-
-  // Atomic OR
-  atomicOr(&g_odata[9], 1 << tid);
-
-  // Atomic XOR
-  atomicXor(&g_odata[10], tid);
-
-  testKernelSub(g_odata);
-}
-
-template<typename T>
-__global__ void testKernel(T* g_odata) {
-    // Atomic addition
-    atomicAdd(&g_odata[0], 10);
-    testKernelIntegral(g_odata);
-    testKernelExch(g_odata);
-}
-
-template<typename T>
-static void runTest() {
-  bool testResult = true;
-  unsigned int numThreads = 256;
-  unsigned int numBlocks = 64;
-  unsigned int numData = 11;
-  unsigned int memSize = sizeof(T) * numData;
-
-  // allocate mem for the result on host side
-  T* hOData = reinterpret_cast<T*>(malloc(memSize));
-
-  // initialize the memory
-  for (unsigned int i = 0; i < numData; i++) {
-    hOData[i] = 0;
-  }
-  // To make the AND and XOR tests generate something other than 0...
-  hOData[8] = hOData[10] = 0xff;
-
-  // allocate device memory for result
-  T* dOData;
-  HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dOData), memSize));
-  // copy host memory to device to initialize to zero
-  HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
-
-  // execute the kernel
-  hipLaunchKernelGGL(
-      testKernel, dim3(numBlocks), dim3(numThreads), 0, 0, dOData);
-
-  // Copy result from device to host
-  HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
-
-  // Compute reference solution
-  REQUIRE(testResult == verifyData(hOData, numThreads * numBlocks));
-
-  // Cleanup memory
-  free(hOData);
-  HIP_CHECK(hipFree(dOData));
-}
-
-TEST_CASE("Unit_SimpleAtomicsTest") {
-  SECTION("test for int") {
-    runTest<int>();
-  }
-  SECTION("test for unsigned int") {
-    runTest<unsigned int>();
-  }
-  SECTION("test for float") {
-    runTest<float>();
-  }
-  #if HT_AMD
-  SECTION("test for unsigned long long") {
-    runTest<uint64_t>();
-  }
-  SECTION("test for double") {
-    runTest<double>();
-  }
-  #endif
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <string.h>
+#include <math.h>
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+
+#include <algorithm>
+#include <type_traits>
+
+using namespace std;
+////////////////////////////////////////////////////////////////////////////////
+// Auto-Verification Code
+////////////////////////////////////////////////////////////////////////////////
+
+bool verifyBitwise(...) {
+    return true;
+}
+
+template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
+bool verifyBitwise(T* gpuData, int len) {
+  // Atomic and
+  T val = 0xff;
+  for (int i = 0; i < len; ++i) {
+    // 9th element should be 1
+    val &= (2 * i + 7);
+  }
+  REQUIRE(val == gpuData[8]);
+
+  // atomic Or
+  val = 0;
+  for (int i = 0; i < len; ++i) {
+    // 10th element should be 0xff
+    val |= (1 << i);
+  }
+  REQUIRE(val == gpuData[9]);
+
+  // atomic Xor
+  val = 0xff;
+
+  for (int i = 0; i < len; ++i) {
+    // 11th element should be 0xff
+    val ^= i;
+  }
+
+  REQUIRE(val == gpuData[10]);
+  return true;
+}
+
+bool verifySub(...) {
+  return true;
+}
+
+template<
+    typename T,
+    typename enable_if<
+        is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
+bool verifySub(T* gpuData, int len) {
+  T val = 0;
+
+  for (int i = 0; i < len; ++i) {
+      val -= 10;
+  }
+
+  REQUIRE(val == gpuData[1]);
+  return true;
+}
+
+bool verifyExch(...) {
+  return true;
+}
+
+template<typename T, typename enable_if<!is_same<T, double> {}>::type* = nullptr> // NOLINT
+bool computeExchExch(T* gpuData, int len) {
+  T val = 0;
+
+  for (T i = 0; i < len; ++i) {
+      if (i == gpuData[2]) {
+          return true;
+          break;
+      }
+  }
+}
+
+bool VerifyIntegral(...) {
+  return true;
+}
+
+template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
+bool VerifyIntegral(T* gpuData, int len) {
+  // atomic Max
+  T val = 0;
+  for (int i = 0; i < len; ++i) {
+    // fourth element should be len-1
+    val = max(val, static_cast<T>(i));
+  }
+
+  REQUIRE(val == gpuData[3]);
+
+  // atomic Min
+  val = 1 << 8;
+
+  for (int i = 0; i < len; ++i) {
+      val = min(val, static_cast<T>(i));
+  }
+
+  REQUIRE(val == gpuData[4]);
+
+  // atomic Inc
+  T limit = 17;
+  val = 0;
+
+  for (int i = 0; i < len; ++i) {
+      val = (val >= limit) ? 0 : val + 1;
+  }
+
+  REQUIRE(val == gpuData[5]);
+
+  // atomic Dec
+  limit = 137;
+  val = 0;
+
+  for (int i = 0; i < len; ++i) {
+      val = ((val == 0) || (val > limit)) ? limit : val - 1;
+  }
+
+  REQUIRE(val == gpuData[6]);
+
+  // atomic CAS
+  for (int i = 0; i < len; ++i) {
+    // eighth element should be a member of [0, len)
+    if (static_cast<T>(i) == gpuData[7]) {
+      return true;
+      break;
+    }
+  }
+  return verifyBitwise(gpuData, len) && verifySub(gpuData, len);
+}
+
+template<typename T>
+bool verifyData(T* gpuData, int len) {
+  T val = 0;
+  for (int i = 0; i < len; ++i) {
+      val += 10;
+  }
+
+  REQUIRE(val == gpuData[0]);
+  return VerifyIntegral(gpuData, len) && verifyExch(gpuData, len);
+}
+
+__device__
+void testKernelExch(...) {}
+
+template<typename T, typename enable_if<!is_same<T, double>{}>::type* = nullptr>
+__device__
+void testKernelExch(T* g_odata) {
+  // access thread id
+  const T tid = blockDim.x * blockIdx.x + threadIdx.x;
+
+  // Atomic exchange
+  atomicExch(&g_odata[2], tid);
+}
+
+__device__
+void testKernelSub(...) {}
+
+template<
+    typename T,
+    typename enable_if<
+        is_same<T, int>{} || is_same<T, unsigned int>{}>::type* = nullptr>
+__device__
+void testKernelSub(T* g_odata) {
+    // Atomic subtraction (final should be 0)
+    atomicSub(&g_odata[1], 10);
+}
+
+__device__
+void testKernelIntegral(...) {}
+
+template<typename T, typename enable_if<is_integral<T>{}>::type* = nullptr>
+__device__
+void testKernelIntegral(T* g_odata) {
+  // access thread id
+  const T tid = blockDim.x * blockIdx.x + threadIdx.x;
+
+  // Atomic maximum
+  atomicMax(&g_odata[3], tid);
+
+  // Atomic minimum
+  atomicMin(&g_odata[4], tid);
+
+  // Atomic increment (modulo 17+1)
+  atomicInc((unsigned int*)&g_odata[5], 17);
+
+  // Atomic decrement
+  atomicDec((unsigned int*)&g_odata[6], 137);
+
+  // Atomic compare-and-swap
+  atomicCAS(&g_odata[7], tid - 1, tid);
+
+  // Bitwise atomic instructions
+
+  // Atomic AND
+  atomicAnd(&g_odata[8], 2 * tid + 7);
+
+  // Atomic OR
+  atomicOr(&g_odata[9], 1 << tid);
+
+  // Atomic XOR
+  atomicXor(&g_odata[10], tid);
+
+  testKernelSub(g_odata);
+}
+
+template<typename T>
+__global__ void testKernel(T* g_odata) {
+    // Atomic addition
+    atomicAdd(&g_odata[0], 10);
+    testKernelIntegral(g_odata);
+    testKernelExch(g_odata);
+}
+
+template<typename T>
+static void runTest() {
+  bool testResult = true;
+  unsigned int numThreads = 256;
+  unsigned int numBlocks = 64;
+  unsigned int numData = 11;
+  unsigned int memSize = sizeof(T) * numData;
+
+  // allocate mem for the result on host side
+  T* hOData = reinterpret_cast<T*>(malloc(memSize));
+
+  // initialize the memory
+  for (unsigned int i = 0; i < numData; i++) {
+    hOData[i] = 0;
+  }
+  // To make the AND and XOR tests generate something other than 0...
+  hOData[8] = hOData[10] = 0xff;
+
+  // allocate device memory for result
+  T* dOData;
+  HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&dOData), memSize));
+  // copy host memory to device to initialize to zero
+  HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
+
+  // execute the kernel
+  hipLaunchKernelGGL(
+      testKernel, dim3(numBlocks), dim3(numThreads), 0, 0, dOData);
+
+  // Copy result from device to host
+  HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
+
+  // Compute reference solution
+  REQUIRE(testResult == verifyData(hOData, numThreads * numBlocks));
+
+  // Cleanup memory
+  free(hOData);
+  HIP_CHECK(hipFree(dOData));
+}
+
+TEST_CASE("Unit_SimpleAtomicsTest") {
+  SECTION("test for int") {
+    runTest<int>();
+  }
+  SECTION("test for unsigned int") {
+    runTest<unsigned int>();
+  }
+  SECTION("test for float") {
+    runTest<float>();
+  }
+  #if HT_AMD
+  SECTION("test for unsigned long long") {
+    runTest<uint64_t>();
+  }
+  SECTION("test for double") {
+    runTest<double>();
+  }
+  #endif
+}
diff --git a/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionIntrinsics.cc b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionIntrinsics.cc
index fb8bebdaa5..182500e833 100644
--- a/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionIntrinsics.cc
+++ b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionIntrinsics.cc
@@ -1,101 +1,101 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
-#include <hip/device_functions.h>
-
-#pragma GCC diagnostic ignored "-Wall"
-#pragma clang diagnostic ignored "-Wunused-variable"
-
-__device__ void single_precision_intrinsics() {
-    float fX, fY;
-
-    __cosf(0.0f);
-    __exp10f(0.0f);
-    __expf(0.0f);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __fadd_rd(0.0f, 1.0f);
-#endif
-    __fadd_rn(0.0f, 1.0f);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __fadd_ru(0.0f, 1.0f);
-    __fadd_rz(0.0f, 1.0f);
-    __fdiv_rd(4.0f, 2.0f);
-#endif
-    __fdiv_rn(4.0f, 2.0f);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __fdiv_ru(4.0f, 2.0f);
-    __fdiv_rz(4.0f, 2.0f);
-#endif
-    __fdividef(4.0f, 2.0f);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __fmaf_rd(1.0f, 2.0f, 3.0f);
-#endif
-    __fmaf_rn(1.0f, 2.0f, 3.0f);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __fmaf_ru(1.0f, 2.0f, 3.0f);
-    __fmaf_rz(1.0f, 2.0f, 3.0f);
-    __fmul_rd(1.0f, 2.0f);
-#endif
-    __fmul_rn(1.0f, 2.0f);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __fmul_ru(1.0f, 2.0f);
-    __fmul_rz(1.0f, 2.0f);
-    __frcp_rd(2.0f);
-#endif
-    __frcp_rn(2.0f);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __frcp_ru(2.0f);
-    __frcp_rz(2.0f);
-#endif
-    __frsqrt_rn(4.0f);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __fsqrt_rd(4.0f);
-#endif
-    __fsqrt_rn(4.0f);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __fsqrt_ru(4.0f);
-    __fsqrt_rz(4.0f);
-    __fsub_rd(2.0f, 1.0f);
-#endif
-    __fsub_rn(2.0f, 1.0f);
-#if defined OCML_BASIC_ROUNDED_OPERATIONS
-    __fsub_ru(2.0f, 1.0f);
-    __fsub_rz(2.0f, 1.0f);
-#endif
-    __log10f(1.0f);
-    __log2f(1.0f);
-    __logf(1.0f);
-    __powf(1.0f, 0.0f);
-    __saturatef(0.1f);
-    __sincosf(0.0f, &fX, &fY);
-    __sinf(0.0f);
-    __tanf(0.0f);
-}
-
-__global__ void compileSinglePrecisionIntrinsics(int) {
-    single_precision_intrinsics();
-}
-
-TEST_CASE("Unit_SinglePrecisionIntrinsics") {
-    hipLaunchKernelGGL(compileSinglePrecisionIntrinsics, dim3(1, 1, 1),
-                                               dim3(1, 1, 1), 0, 0, 1);
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+#include <hip/device_functions.h>
+
+#pragma GCC diagnostic ignored "-Wall"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+__device__ void single_precision_intrinsics() {
+    float fX, fY;
+
+    __cosf(0.0f);
+    __exp10f(0.0f);
+    __expf(0.0f);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __fadd_rd(0.0f, 1.0f);
+#endif
+    __fadd_rn(0.0f, 1.0f);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __fadd_ru(0.0f, 1.0f);
+    __fadd_rz(0.0f, 1.0f);
+    __fdiv_rd(4.0f, 2.0f);
+#endif
+    __fdiv_rn(4.0f, 2.0f);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __fdiv_ru(4.0f, 2.0f);
+    __fdiv_rz(4.0f, 2.0f);
+#endif
+    __fdividef(4.0f, 2.0f);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __fmaf_rd(1.0f, 2.0f, 3.0f);
+#endif
+    __fmaf_rn(1.0f, 2.0f, 3.0f);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __fmaf_ru(1.0f, 2.0f, 3.0f);
+    __fmaf_rz(1.0f, 2.0f, 3.0f);
+    __fmul_rd(1.0f, 2.0f);
+#endif
+    __fmul_rn(1.0f, 2.0f);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __fmul_ru(1.0f, 2.0f);
+    __fmul_rz(1.0f, 2.0f);
+    __frcp_rd(2.0f);
+#endif
+    __frcp_rn(2.0f);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __frcp_ru(2.0f);
+    __frcp_rz(2.0f);
+#endif
+    __frsqrt_rn(4.0f);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __fsqrt_rd(4.0f);
+#endif
+    __fsqrt_rn(4.0f);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __fsqrt_ru(4.0f);
+    __fsqrt_rz(4.0f);
+    __fsub_rd(2.0f, 1.0f);
+#endif
+    __fsub_rn(2.0f, 1.0f);
+#if defined OCML_BASIC_ROUNDED_OPERATIONS
+    __fsub_ru(2.0f, 1.0f);
+    __fsub_rz(2.0f, 1.0f);
+#endif
+    __log10f(1.0f);
+    __log2f(1.0f);
+    __logf(1.0f);
+    __powf(1.0f, 0.0f);
+    __saturatef(0.1f);
+    __sincosf(0.0f, &fX, &fY);
+    __sinf(0.0f);
+    __tanf(0.0f);
+}
+
+__global__ void compileSinglePrecisionIntrinsics(int) {
+    single_precision_intrinsics();
+}
+
+TEST_CASE("Unit_SinglePrecisionIntrinsics") {
+    hipLaunchKernelGGL(compileSinglePrecisionIntrinsics, dim3(1, 1, 1),
+                                               dim3(1, 1, 1), 0, 0, 1);
+}
diff --git a/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathDevice.cc b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathDevice.cc
index e7bbdc180e..bc63b88c13 100644
--- a/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathDevice.cc
+++ b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathDevice.cc
@@ -1,123 +1,123 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
-#include <hip/math_functions.h>
-
-#pragma GCC diagnostic ignored "-Wall"
-#pragma clang diagnostic ignored "-Wunused-variable"
-
-__device__ void single_precision_math_functions() {
-    int iX;
-    float fX, fY;
-
-    acosf(1.0f);
-    acoshf(1.0f);
-    asinf(0.0f);
-    asinhf(0.0f);
-    atan2f(0.0f, 1.0f);
-    atanf(0.0f);
-    atanhf(0.0f);
-    cbrtf(0.0f);
-    ceilf(0.0f);
-    copysignf(1.0f, -2.0f);
-    cosf(0.0f);
-    coshf(0.0f);
-    cospif(0.0f);
-    erfcf(0.0f);
-    erfcinvf(2.0f);
-    erfcxf(0.0f);
-    erff(0.0f);
-    erfinvf(1.0f);
-    exp10f(0.0f);
-    exp2f(0.0f);
-    expf(0.0f);
-    expm1f(0.0f);
-    fabsf(1.0f);
-    fdimf(1.0f, 0.0f);
-    fdividef(0.0f, 1.0f);
-    floorf(0.0f);
-    fmaf(1.0f, 2.0f, 3.0f);
-    fmaxf(0.0f, 0.0f);
-    fminf(0.0f, 0.0f);
-    fmodf(0.0f, 1.0f);
-    frexpf(0.0f, &iX);
-    hypotf(1.0f, 0.0f);
-    ilogbf(1.0f);
-    isfinite(0.0f);
-    isinf(0.0f);
-    isnan(0.0f);
-    j0f(0.0f);
-    j1f(0.0f);
-    jnf(-1.0f, 1.0f);
-    ldexpf(0.0f, 0);
-    llrintf(0.0f);
-    llroundf(0.0f);
-    log10f(1.0f);
-    log1pf(-1.0f);
-    log2f(1.0f);
-    logbf(1.0f);
-    logf(1.0f);
-    lrintf(0.0f);
-    lroundf(0.0f);
-    nanf("1");
-    nearbyintf(0.0f);
-    norm3df(1.0f, 0.0f, 0.0f);
-    norm4df(1.0f, 0.0f, 0.0f, 0.0f);
-    normcdff(0.0f);
-    normcdfinvf(1.0f);
-    fX = 1.0f;
-    normf(1, &fX);
-    powf(1.0f, 0.0f);
-    remainderf(2.0f, 1.0f);
-    rhypotf(0.0f, 1.0f);
-    rintf(1.0f);
-    rnorm3df(0.0f, 0.0f, 1.0f);
-    rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
-    fX = 1.0f;
-    rnormf(1, &fX);
-    roundf(0.0f);
-    rsqrtf(1.0f);
-    signbit(1.0f);
-    sincosf(0.0f, &fX, &fY);
-    sincospif(0.0f, &fX, &fY);
-    sinf(0.0f);
-    sinhf(0.0f);
-    sinpif(0.0f);
-    sqrtf(0.0f);
-    tanf(0.0f);
-    tanhf(0.0f);
-    tgammaf(2.0f);
-    truncf(0.0f);
-    y0f(1.0f);
-    y1f(1.0f);
-    ynf(1, 1.0f);
-}
-
-__global__ void compileSinglePrecisionMathOnDevice(int) {
-    single_precision_math_functions();
-}
-
-TEST_CASE("Unit_SinglePrecisionMathDevice") {
-    hipLaunchKernelGGL(compileSinglePrecisionMathOnDevice, dim3(1, 1, 1),
-                                                 dim3(1, 1, 1), 0, 0, 1);
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+#include <hip/math_functions.h>
+
+#pragma GCC diagnostic ignored "-Wall"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+__device__ void single_precision_math_functions() {
+    int iX;
+    float fX, fY;
+
+    acosf(1.0f);
+    acoshf(1.0f);
+    asinf(0.0f);
+    asinhf(0.0f);
+    atan2f(0.0f, 1.0f);
+    atanf(0.0f);
+    atanhf(0.0f);
+    cbrtf(0.0f);
+    ceilf(0.0f);
+    copysignf(1.0f, -2.0f);
+    cosf(0.0f);
+    coshf(0.0f);
+    cospif(0.0f);
+    erfcf(0.0f);
+    erfcinvf(2.0f);
+    erfcxf(0.0f);
+    erff(0.0f);
+    erfinvf(1.0f);
+    exp10f(0.0f);
+    exp2f(0.0f);
+    expf(0.0f);
+    expm1f(0.0f);
+    fabsf(1.0f);
+    fdimf(1.0f, 0.0f);
+    fdividef(0.0f, 1.0f);
+    floorf(0.0f);
+    fmaf(1.0f, 2.0f, 3.0f);
+    fmaxf(0.0f, 0.0f);
+    fminf(0.0f, 0.0f);
+    fmodf(0.0f, 1.0f);
+    frexpf(0.0f, &iX);
+    hypotf(1.0f, 0.0f);
+    ilogbf(1.0f);
+    isfinite(0.0f);
+    isinf(0.0f);
+    isnan(0.0f);
+    j0f(0.0f);
+    j1f(0.0f);
+    jnf(-1.0f, 1.0f);
+    ldexpf(0.0f, 0);
+    llrintf(0.0f);
+    llroundf(0.0f);
+    log10f(1.0f);
+    log1pf(-1.0f);
+    log2f(1.0f);
+    logbf(1.0f);
+    logf(1.0f);
+    lrintf(0.0f);
+    lroundf(0.0f);
+    nanf("1");
+    nearbyintf(0.0f);
+    norm3df(1.0f, 0.0f, 0.0f);
+    norm4df(1.0f, 0.0f, 0.0f, 0.0f);
+    normcdff(0.0f);
+    normcdfinvf(1.0f);
+    fX = 1.0f;
+    normf(1, &fX);
+    powf(1.0f, 0.0f);
+    remainderf(2.0f, 1.0f);
+    rhypotf(0.0f, 1.0f);
+    rintf(1.0f);
+    rnorm3df(0.0f, 0.0f, 1.0f);
+    rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
+    fX = 1.0f;
+    rnormf(1, &fX);
+    roundf(0.0f);
+    rsqrtf(1.0f);
+    signbit(1.0f);
+    sincosf(0.0f, &fX, &fY);
+    sincospif(0.0f, &fX, &fY);
+    sinf(0.0f);
+    sinhf(0.0f);
+    sinpif(0.0f);
+    sqrtf(0.0f);
+    tanf(0.0f);
+    tanhf(0.0f);
+    tgammaf(2.0f);
+    truncf(0.0f);
+    y0f(1.0f);
+    y1f(1.0f);
+    ynf(1, 1.0f);
+}
+
+__global__ void compileSinglePrecisionMathOnDevice(int) {
+    single_precision_math_functions();
+}
+
+TEST_CASE("Unit_SinglePrecisionMathDevice") {
+    hipLaunchKernelGGL(compileSinglePrecisionMathOnDevice, dim3(1, 1, 1),
+                                                 dim3(1, 1, 1), 0, 0, 1);
+}
diff --git a/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathHost.cc b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathHost.cc
index 85407560cb..bceaeadb27 100644
--- a/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathHost.cc
+++ b/projects/hip-tests/catch/unit/deviceLib/SinglePrecisionMathHost.cc
@@ -1,113 +1,113 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_common.hh>
-#include <cmath>
-
-#pragma GCC diagnostic ignored "-Wall"
-#pragma clang diagnostic ignored "-Wunused-variable"
-
-__host__ static void single_precision_math_functions() {
-    int iX;
-    float fX, fY;
-
-    acosf(1.0f);
-    acoshf(1.0f);
-    asinf(0.0f);
-    asinhf(0.0f);
-    atan2f(0.0f, 1.0f);
-    atanf(0.0f);
-    atanhf(0.0f);
-    cbrtf(0.0f);
-    ceilf(0.0f);
-    copysignf(1.0f, -2.0f);
-    cosf(0.0f);
-    coshf(0.0f);
-    erfcf(0.0f);
-    erff(0.0f);
-    #ifdef __unix__
-    exp10f(0.0f);
-    #endif
-    exp2f(0.0f);
-    expf(0.0f);
-    expm1f(0.0f);
-    fabsf(1.0f);
-    fdimf(1.0f, 0.0f);
-    floorf(0.0f);
-    fmaf(1.0f, 2.0f, 3.0f);
-    fmaxf(0.0f, 0.0f);
-    fminf(0.0f, 0.0f);
-    fmodf(0.0f, 1.0f);
-    frexpf(0.0f, &iX);
-    hypotf(1.0f, 0.0f);
-    ilogbf(1.0f);
-    std::isfinite(0.0f);
-    std::isinf(0.0f);
-    std::isnan(0.0f);
-    #ifdef __unix__
-    j0f(0.0f);
-    j1f(0.0f);
-    jnf(-1.0f, 1.0f);
-    #endif
-    ldexpf(0.0f, 0);
-    lgammaf(1.0f);
-    llrintf(0.0f);
-    llroundf(0.0f);
-    log10f(1.0f);
-    log1pf(-1.0f);
-    log2f(1.0f);
-    logbf(1.0f);
-    logf(1.0f);
-    lrintf(0.0f);
-    lroundf(0.0f);
-    modff(0.0f, &fX);
-    nanf("1");
-    nearbyintf(0.0f);
-    powf(1.0f, 0.0f);
-    remainderf(2.0f, 1.0f);
-    remquof(1.0f, 2.0f, &iX);
-    rintf(1.0f);
-#if HT_AMD
-    fX = 1.0f;
-#endif
-    roundf(0.0f);
-    /// rsqrtf(1.0f);
-    scalblnf(0.0f, 1);
-    scalbnf(0.0f, 1);
-    std::signbit(1.0f);
-    #ifdef __unix__
-    sincosf(0.0f, &fX, &fY);
-    #endif
-    sinf(0.0f);
-    sinhf(0.0f);
-    sqrtf(0.0f);
-    tanf(0.0f);
-    tanhf(0.0f);
-    tgammaf(2.0f);
-    truncf(0.0f);
-    #ifdef __unix__
-    y0f(1.0f);
-    y1f(1.0f);
-    ynf(1, 1.0f);
-    #endif
-}
-
-TEST_CASE("Unit_SinglePrecisionMathHost") {
-  single_precision_math_functions();
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+#include <cmath>
+
+#pragma GCC diagnostic ignored "-Wall"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+__host__ static void single_precision_math_functions() {
+    int iX;
+    float fX, fY;
+
+    acosf(1.0f);
+    acoshf(1.0f);
+    asinf(0.0f);
+    asinhf(0.0f);
+    atan2f(0.0f, 1.0f);
+    atanf(0.0f);
+    atanhf(0.0f);
+    cbrtf(0.0f);
+    ceilf(0.0f);
+    copysignf(1.0f, -2.0f);
+    cosf(0.0f);
+    coshf(0.0f);
+    erfcf(0.0f);
+    erff(0.0f);
+    #ifdef __unix__
+    exp10f(0.0f);
+    #endif
+    exp2f(0.0f);
+    expf(0.0f);
+    expm1f(0.0f);
+    fabsf(1.0f);
+    fdimf(1.0f, 0.0f);
+    floorf(0.0f);
+    fmaf(1.0f, 2.0f, 3.0f);
+    fmaxf(0.0f, 0.0f);
+    fminf(0.0f, 0.0f);
+    fmodf(0.0f, 1.0f);
+    frexpf(0.0f, &iX);
+    hypotf(1.0f, 0.0f);
+    ilogbf(1.0f);
+    std::isfinite(0.0f);
+    std::isinf(0.0f);
+    std::isnan(0.0f);
+    #ifdef __unix__
+    j0f(0.0f);
+    j1f(0.0f);
+    jnf(-1.0f, 1.0f);
+    #endif
+    ldexpf(0.0f, 0);
+    lgammaf(1.0f);
+    llrintf(0.0f);
+    llroundf(0.0f);
+    log10f(1.0f);
+    log1pf(-1.0f);
+    log2f(1.0f);
+    logbf(1.0f);
+    logf(1.0f);
+    lrintf(0.0f);
+    lroundf(0.0f);
+    modff(0.0f, &fX);
+    nanf("1");
+    nearbyintf(0.0f);
+    powf(1.0f, 0.0f);
+    remainderf(2.0f, 1.0f);
+    remquof(1.0f, 2.0f, &iX);
+    rintf(1.0f);
+#if HT_AMD
+    fX = 1.0f;
+#endif
+    roundf(0.0f);
+    /// rsqrtf(1.0f);
+    scalblnf(0.0f, 1);
+    scalbnf(0.0f, 1);
+    std::signbit(1.0f);
+    #ifdef __unix__
+    sincosf(0.0f, &fX, &fY);
+    #endif
+    sinf(0.0f);
+    sinhf(0.0f);
+    sqrtf(0.0f);
+    tanf(0.0f);
+    tanhf(0.0f);
+    tgammaf(2.0f);
+    truncf(0.0f);
+    #ifdef __unix__
+    y0f(1.0f);
+    y1f(1.0f);
+    ynf(1, 1.0f);
+    #endif
+}
+
+TEST_CASE("Unit_SinglePrecisionMathHost") {
+  single_precision_math_functions();
+}
diff --git a/projects/hip-tests/catch/unit/deviceLib/hipStdComplex.cc b/projects/hip-tests/catch/unit/deviceLib/hipStdComplex.cc
index c0bafae007..1b8db8658c 100644
--- a/projects/hip-tests/catch/unit/deviceLib/hipStdComplex.cc
+++ b/projects/hip-tests/catch/unit/deviceLib/hipStdComplex.cc
@@ -1,153 +1,153 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
-IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_common.hh>
-#include <hip_test_checkers.hh>
-#include <complex>
-
-#pragma clang diagnostic ignored "-Wunused-variable"
-// Tolerance for error
-const double tolerance = 1e-6;
-
-#define LEN 64
-
-#define ALL_FUN \
-  OP(add) \
-  OP(sub) \
-  OP(mul) \
-  OP(div) \
-  OP(abs) \
-  OP(arg) \
-  OP(sin) \
-  OP(cos)
-
-#define OP(x) CK_##x,
-enum CalcKind {
-  ALL_FUN
-};
-#undef OP
-
-#define OP(x) case CK_##x: return #x;
-std::string getName(enum CalcKind CK) {
-  switch (CK) {
-  ALL_FUN
-  }
-  return "";  // To prevent compile warning
-}
-#undef OP
-
-// Calculates function.
-// If the function has one argument, B is ignored.
-// If the function returns real number, converts it to a complex number.
-#define ONE_ARG(func) \
-  case CK_##func: \
-    return std::complex<FloatT>(func(A));
-
-template<typename FloatT>
-__device__ __host__ std::complex<FloatT> calc(std::complex<FloatT> A,
-                                        std::complex<FloatT> B,
-                                        enum CalcKind CK) {
-  switch (CK) {
-  case CK_add:
-    return A + B;
-  case CK_sub:
-    return A - B;
-  case CK_mul:
-    return A * B;
-  case CK_div:
-    return A / B;
-
-    ONE_ARG(abs)
-    ONE_ARG(arg)
-    ONE_ARG(sin)
-    ONE_ARG(cos)
-  }
-  return A;  // To prevent compile warning
-}
-
-template<typename FloatT>
-__global__ void kernel(std::complex<FloatT>* A,
-                       std::complex<FloatT>* B, std::complex<FloatT>* C,
-                       enum CalcKind CK) {
-    int tx = threadIdx.x + blockIdx.x * blockDim.x;
-    C[tx] = calc<FloatT>(A[tx], B[tx], CK);
-}
-
-template<typename FloatT>
-void test() {
-    typedef std::complex<FloatT> ComplexT;
-
-    ComplexT *A, *Ad, *B, *Bd, *C, *Cd, *D;
-    A = new ComplexT[LEN];
-    B = new ComplexT[LEN];
-    C = new ComplexT[LEN];
-    D = new ComplexT[LEN];
-    HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), sizeof(ComplexT)*LEN));
-    HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Bd), sizeof(ComplexT)*LEN));
-    HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Cd), sizeof(ComplexT)*LEN));
-
-    for (uint32_t i = 0; i < LEN; i++) {
-        A[i] = ComplexT((i + 1) * 1.0f, (i + 2) * 1.0f);
-        B[i] = A[i];
-        C[i] = A[i];
-    }
-    HIP_CHECK(hipMemcpy(Ad, A, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
-    HIP_CHECK(hipMemcpy(Bd, B, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
-
-    // Run kernel for a calculation kind and verify by comparing with host
-    // calculation result. Returns false if fails.
-    auto test_fun = [&](enum CalcKind CK) {
-      hipLaunchKernelGGL(kernel<FloatT>, dim3(1), dim3(LEN), 0, 0,
-                                                   Ad, Bd, Cd, CK);
-      HIP_CHECK(hipMemcpy(C, Cd, sizeof(ComplexT)*LEN, hipMemcpyDeviceToHost));
-      bool pass = true;
-      for (int i = 0; i < LEN; i++) {
-        ComplexT Expected = calc(A[i], B[i], CK);
-        FloatT error = abs(C[i] - Expected);
-        if (abs(Expected) > tolerance)
-          error /= abs(Expected);
-        pass &= error < tolerance;
-      }
-      return pass;
-    };
-
-#define OP(x) assert(test_fun(CK_##x));
-    ALL_FUN
-#undef OP
-
-    HIP_CHECK(hipFree(Ad));
-    HIP_CHECK(hipFree(Bd));
-    HIP_CHECK(hipFree(Cd));
-    delete[] A;
-    delete[] B;
-    delete[] C;
-    delete[] D;
-}
-
-#if HT_AMD
-TEST_CASE("Unit_StdComplex") {
-  SECTION("Test run with float") {
-  test<float>();
-  }
-  SECTION("Test run with double") {
-  test<double>();
-  }
-}
-#endif
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
+IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_common.hh>
+#include <hip_test_checkers.hh>
+#include <complex>
+
+#pragma clang diagnostic ignored "-Wunused-variable"
+// Tolerance for error
+const double tolerance = 1e-6;
+
+#define LEN 64
+
+#define ALL_FUN \
+  OP(add) \
+  OP(sub) \
+  OP(mul) \
+  OP(div) \
+  OP(abs) \
+  OP(arg) \
+  OP(sin) \
+  OP(cos)
+
+#define OP(x) CK_##x,
+enum CalcKind {
+  ALL_FUN
+};
+#undef OP
+
+#define OP(x) case CK_##x: return #x;
+std::string getName(enum CalcKind CK) {
+  switch (CK) {
+  ALL_FUN
+  }
+  return "";  // To prevent compile warning
+}
+#undef OP
+
+// Calculates function.
+// If the function has one argument, B is ignored.
+// If the function returns real number, converts it to a complex number.
+#define ONE_ARG(func) \
+  case CK_##func: \
+    return std::complex<FloatT>(func(A));
+
+template<typename FloatT>
+__device__ __host__ std::complex<FloatT> calc(std::complex<FloatT> A,
+                                        std::complex<FloatT> B,
+                                        enum CalcKind CK) {
+  switch (CK) {
+  case CK_add:
+    return A + B;
+  case CK_sub:
+    return A - B;
+  case CK_mul:
+    return A * B;
+  case CK_div:
+    return A / B;
+
+    ONE_ARG(abs)
+    ONE_ARG(arg)
+    ONE_ARG(sin)
+    ONE_ARG(cos)
+  }
+  return A;  // To prevent compile warning
+}
+
+template<typename FloatT>
+__global__ void kernel(std::complex<FloatT>* A,
+                       std::complex<FloatT>* B, std::complex<FloatT>* C,
+                       enum CalcKind CK) {
+    int tx = threadIdx.x + blockIdx.x * blockDim.x;
+    C[tx] = calc<FloatT>(A[tx], B[tx], CK);
+}
+
+template<typename FloatT>
+void test() {
+    typedef std::complex<FloatT> ComplexT;
+
+    ComplexT *A, *Ad, *B, *Bd, *C, *Cd, *D;
+    A = new ComplexT[LEN];
+    B = new ComplexT[LEN];
+    C = new ComplexT[LEN];
+    D = new ComplexT[LEN];
+    HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), sizeof(ComplexT)*LEN));
+    HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Bd), sizeof(ComplexT)*LEN));
+    HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Cd), sizeof(ComplexT)*LEN));
+
+    for (uint32_t i = 0; i < LEN; i++) {
+        A[i] = ComplexT((i + 1) * 1.0f, (i + 2) * 1.0f);
+        B[i] = A[i];
+        C[i] = A[i];
+    }
+    HIP_CHECK(hipMemcpy(Ad, A, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
+    HIP_CHECK(hipMemcpy(Bd, B, sizeof(ComplexT)*LEN, hipMemcpyHostToDevice));
+
+    // Run kernel for a calculation kind and verify by comparing with host
+    // calculation result. Returns false if fails.
+    auto test_fun = [&](enum CalcKind CK) {
+      hipLaunchKernelGGL(kernel<FloatT>, dim3(1), dim3(LEN), 0, 0,
+                                                   Ad, Bd, Cd, CK);
+      HIP_CHECK(hipMemcpy(C, Cd, sizeof(ComplexT)*LEN, hipMemcpyDeviceToHost));
+      bool pass = true;
+      for (int i = 0; i < LEN; i++) {
+        ComplexT Expected = calc(A[i], B[i], CK);
+        FloatT error = abs(C[i] - Expected);
+        if (abs(Expected) > tolerance)
+          error /= abs(Expected);
+        pass &= error < tolerance;
+      }
+      return pass;
+    };
+
+#define OP(x) assert(test_fun(CK_##x));
+    ALL_FUN
+#undef OP
+
+    HIP_CHECK(hipFree(Ad));
+    HIP_CHECK(hipFree(Bd));
+    HIP_CHECK(hipFree(Cd));
+    delete[] A;
+    delete[] B;
+    delete[] C;
+    delete[] D;
+}
+
+#if HT_AMD
+TEST_CASE("Unit_StdComplex") {
+  SECTION("Test run with float") {
+  test<float>();
+  }
+  SECTION("Test run with double") {
+  test<double>();
+  }
+}
+#endif
diff --git a/projects/hip-tests/catch/unit/deviceLib/hipTestAtomicAdd.cc b/projects/hip-tests/catch/unit/deviceLib/hipTestAtomicAdd.cc
index c728d85b18..3719a3560c 100644
--- a/projects/hip-tests/catch/unit/deviceLib/hipTestAtomicAdd.cc
+++ b/projects/hip-tests/catch/unit/deviceLib/hipTestAtomicAdd.cc
@@ -1,220 +1,220 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/**
-Testcase Scenarios :
- (TestCase 1)::
- 1) Execute atomicAdd in multi threaded scenario by diverging the data across
- multiple threads and validate the output at the end of all operations.
- 2) Execute atomicAddNoRet in multi threaded scenario by diverging the data
- across multiple threads and validate the output at the end of all operations.
- (TestCase 2)::
- 3) Execute atomicAdd API and validate the result.
- 4) Execute atomicAddNoRet API and validate the result.
- (TestCase 3)::
- 5) atomicadd/NoRet negative scenarios (TBD).
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_common.hh>
-#include <hip_test_checkers.hh>
-/*
- * Defines initial and increment values
- */
-#define INCREMENT_VALUE 10
-#define INT_INITIAL_VALUE 10
-#define FLOAT_INITIAL_VALUE 10.50
-#define DOUBLE_INITIAL_VALUE 200.12
-#define LONG_INITIAL_VALUE 10000
-#define UNSIGNED_INITIAL_VALUE 20
-
-#if HT_NVIDIA
-// atomicAddNoRet is unavailable in cuda
-template <typename T>
-__device__ void atomicAddNoRet(T* x, int y) {
-  atomicAdd(x, static_cast<T>(y));
-}
-#endif
-
-bool p_atomicNoRet = false;
-
-template <typename T>
-__global__ void atomicnoret_manywaves(T* C_d) {
-  atomicAddNoRet(C_d, INCREMENT_VALUE);
-}
-
-template <typename T>
-__global__ void atomic_manywaves(T* C_d) {
-  atomicAdd(C_d, INCREMENT_VALUE);
-}
-
-template <typename T>
-__global__ void atomicnoret_simple(T* C_d) {
-  atomicAddNoRet(C_d, INCREMENT_VALUE);
-}
-
-template <typename T>
-__global__ void atomic_simple(T* C_d) {
-  atomicAdd(C_d, INCREMENT_VALUE);
-}
-
-template <typename T>
-bool atomictest_manywaves(const T& initial_val) {
-  unsigned int ThreadsperBlock = 10;
-  unsigned int numBlocks = 1;
-  T memSize = sizeof(T);
-  T* hOData = reinterpret_cast<T*>(malloc(memSize));
-  *hOData = initial_val;
-  T* dOData;
-  HIP_CHECK(hipMalloc(&dOData, memSize));
-  // copy host memory to device to initialize to zero
-  HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
-
-  // execute the kernel
-  hipLaunchKernelGGL(atomic_manywaves, dim3(numBlocks),
-      dim3(ThreadsperBlock), 0, 0, dOData);
-
-  // Copy result from device to host
-  HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
-  REQUIRE(hOData[0] == initial_val+
-                      static_cast<T>(INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
-
-  // Cleanup memory
-  free(hOData);
-  HIP_CHECK(hipFree(dOData));
-
-  return true;
-}
-
-template <typename T>
-bool atomictestnoret_manywaves(const T& initial_val) {
-  unsigned int ThreadsperBlock = 10;
-  unsigned int numBlocks = 1;
-  T memSize = sizeof(T);
-  T* hOData = reinterpret_cast<T*>(malloc(memSize));
-  *hOData = initial_val;
-  T* dOData;
-  HIP_CHECK(hipMalloc(&dOData, memSize));
-  // copy host memory to device to initialize to zero
-  HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
-
-  // execute the kernel
-  hipLaunchKernelGGL(atomicnoret_manywaves, dim3(numBlocks),
-      dim3(ThreadsperBlock), 0, 0, dOData);
-
-  // Copy result from device to host
-  HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
-  REQUIRE(hOData[0] == initial_val+
-                       (INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
-
-  // Cleanup memory
-  free(hOData);
-  HIP_CHECK(hipFree(dOData));
-
-  return true;
-}
-
-template <typename T>
-bool atomictest_simple(const T& initial_val) {
-  unsigned int ThreadsperBlock = 1;
-  unsigned int numBlocks = 1;
-  T memSize = sizeof(T);
-  T* hOData = reinterpret_cast<T*>(malloc(memSize));
-  *hOData = initial_val;
-  T* dOData;
-  HIP_CHECK(hipMalloc(&dOData, memSize));
-  // copy host memory to device to initialize to zero
-  HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
-
-  // execute the kernel
-  hipLaunchKernelGGL(atomic_simple, dim3(numBlocks),
-      dim3(ThreadsperBlock), 0, 0, dOData);
-
-  // Copy result from device to host
-  HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
-  REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
-
-  // Cleanup memory
-  free(hOData);
-  HIP_CHECK(hipFree(dOData));
-
-  return true;
-}
-
-template <typename T>
-bool atomictestnoret_simple(const T& initial_val) {
-  unsigned int ThreadsperBlock = 1;
-  unsigned int numBlocks = 1;
-  T memSize = sizeof(T);
-  T* hOData = reinterpret_cast<T*>(malloc(memSize));
-  *hOData = initial_val;
-  T* dOData;
-  HIP_CHECK(hipMalloc(&dOData, memSize));
-  // copy host memory to device to initialize to zero
-  HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
-
-  // execute the kernel
-  hipLaunchKernelGGL(atomicnoret_simple, dim3(numBlocks),
-      dim3(ThreadsperBlock), 0, 0, dOData);
-
-  // Copy result from device to host
-  HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
-  REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
-
-  // Cleanup memory
-  free(hOData);
-  HIP_CHECK(hipFree(dOData));
-
-  return true;
-}
-
-TEST_CASE("Unit_hipTestAtomicAdd") {
-  bool TestPassed = true;
-
-  SECTION("atomic tests with many waves") {
-    REQUIRE(TestPassed == atomictest_manywaves<int>(INT_INITIAL_VALUE));
-    REQUIRE(TestPassed ==
-            atomictest_manywaves<unsigned int>(UNSIGNED_INITIAL_VALUE));
-    REQUIRE(TestPassed == atomictest_manywaves<float>(FLOAT_INITIAL_VALUE));
-    #if HT_AMD
-    REQUIRE(TestPassed ==
-        atomictest_manywaves<uint64_t>(LONG_INITIAL_VALUE));
-    REQUIRE(TestPassed ==
-          atomictest_manywaves<double>(DOUBLE_INITIAL_VALUE));
-    #endif
-  }
-  SECTION("atomic tests with many waves and no return") {
-    REQUIRE(TestPassed ==
-            atomictestnoret_manywaves<float>(FLOAT_INITIAL_VALUE));
-  }
-  SECTION("simple atomic tests") {
-    REQUIRE(TestPassed == atomictest_simple<int>(INT_INITIAL_VALUE));
-    REQUIRE(TestPassed ==
-            atomictest_simple<unsigned int>(UNSIGNED_INITIAL_VALUE));
-    REQUIRE(TestPassed == atomictest_simple<float>(FLOAT_INITIAL_VALUE));
-    #if HT_AMD
-    REQUIRE(TestPassed ==
-            atomictest_simple<uint64_t>(LONG_INITIAL_VALUE));
-    REQUIRE(TestPassed == atomictest_simple<double>(DOUBLE_INITIAL_VALUE));
-    #endif
-  }
-  SECTION("Simple atomic test with no return") {
-    REQUIRE(TestPassed == atomictestnoret_simple<float>(FLOAT_INITIAL_VALUE));
-  }
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/**
+Testcase Scenarios :
+ (TestCase 1)::
+ 1) Execute atomicAdd in multi threaded scenario by diverging the data across
+ multiple threads and validate the output at the end of all operations.
+ 2) Execute atomicAddNoRet in multi threaded scenario by diverging the data
+ across multiple threads and validate the output at the end of all operations.
+ (TestCase 2)::
+ 3) Execute atomicAdd API and validate the result.
+ 4) Execute atomicAddNoRet API and validate the result.
+ (TestCase 3)::
+ 5) atomicadd/NoRet negative scenarios (TBD).
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_common.hh>
+#include <hip_test_checkers.hh>
+/*
+ * Defines initial and increment values
+ */
+#define INCREMENT_VALUE 10
+#define INT_INITIAL_VALUE 10
+#define FLOAT_INITIAL_VALUE 10.50
+#define DOUBLE_INITIAL_VALUE 200.12
+#define LONG_INITIAL_VALUE 10000
+#define UNSIGNED_INITIAL_VALUE 20
+
+#if HT_NVIDIA
+// atomicAddNoRet is unavailable in cuda
+template <typename T>
+__device__ void atomicAddNoRet(T* x, int y) {
+  atomicAdd(x, static_cast<T>(y));
+}
+#endif
+
+bool p_atomicNoRet = false;
+
+template <typename T>
+__global__ void atomicnoret_manywaves(T* C_d) {
+  atomicAddNoRet(C_d, INCREMENT_VALUE);
+}
+
+template <typename T>
+__global__ void atomic_manywaves(T* C_d) {
+  atomicAdd(C_d, INCREMENT_VALUE);
+}
+
+template <typename T>
+__global__ void atomicnoret_simple(T* C_d) {
+  atomicAddNoRet(C_d, INCREMENT_VALUE);
+}
+
+template <typename T>
+__global__ void atomic_simple(T* C_d) {
+  atomicAdd(C_d, INCREMENT_VALUE);
+}
+
+template <typename T>
+bool atomictest_manywaves(const T& initial_val) {
+  unsigned int ThreadsperBlock = 10;
+  unsigned int numBlocks = 1;
+  T memSize = sizeof(T);
+  T* hOData = reinterpret_cast<T*>(malloc(memSize));
+  *hOData = initial_val;
+  T* dOData;
+  HIP_CHECK(hipMalloc(&dOData, memSize));
+  // copy host memory to device to initialize to zero
+  HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
+
+  // execute the kernel
+  hipLaunchKernelGGL(atomic_manywaves, dim3(numBlocks),
+      dim3(ThreadsperBlock), 0, 0, dOData);
+
+  // Copy result from device to host
+  HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
+  REQUIRE(hOData[0] == initial_val+
+                      static_cast<T>(INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
+
+  // Cleanup memory
+  free(hOData);
+  HIP_CHECK(hipFree(dOData));
+
+  return true;
+}
+
+template <typename T>
+bool atomictestnoret_manywaves(const T& initial_val) {
+  unsigned int ThreadsperBlock = 10;
+  unsigned int numBlocks = 1;
+  T memSize = sizeof(T);
+  T* hOData = reinterpret_cast<T*>(malloc(memSize));
+  *hOData = initial_val;
+  T* dOData;
+  HIP_CHECK(hipMalloc(&dOData, memSize));
+  // copy host memory to device to initialize to zero
+  HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
+
+  // execute the kernel
+  hipLaunchKernelGGL(atomicnoret_manywaves, dim3(numBlocks),
+      dim3(ThreadsperBlock), 0, 0, dOData);
+
+  // Copy result from device to host
+  HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
+  REQUIRE(hOData[0] == initial_val+
+                       (INCREMENT_VALUE*(ThreadsperBlock*numBlocks)));
+
+  // Cleanup memory
+  free(hOData);
+  HIP_CHECK(hipFree(dOData));
+
+  return true;
+}
+
+template <typename T>
+bool atomictest_simple(const T& initial_val) {
+  unsigned int ThreadsperBlock = 1;
+  unsigned int numBlocks = 1;
+  T memSize = sizeof(T);
+  T* hOData = reinterpret_cast<T*>(malloc(memSize));
+  *hOData = initial_val;
+  T* dOData;
+  HIP_CHECK(hipMalloc(&dOData, memSize));
+  // copy host memory to device to initialize to zero
+  HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
+
+  // execute the kernel
+  hipLaunchKernelGGL(atomic_simple, dim3(numBlocks),
+      dim3(ThreadsperBlock), 0, 0, dOData);
+
+  // Copy result from device to host
+  HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
+  REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
+
+  // Cleanup memory
+  free(hOData);
+  HIP_CHECK(hipFree(dOData));
+
+  return true;
+}
+
+template <typename T>
+bool atomictestnoret_simple(const T& initial_val) {
+  unsigned int ThreadsperBlock = 1;
+  unsigned int numBlocks = 1;
+  T memSize = sizeof(T);
+  T* hOData = reinterpret_cast<T*>(malloc(memSize));
+  *hOData = initial_val;
+  T* dOData;
+  HIP_CHECK(hipMalloc(&dOData, memSize));
+  // copy host memory to device to initialize to zero
+  HIP_CHECK(hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice));
+
+  // execute the kernel
+  hipLaunchKernelGGL(atomicnoret_simple, dim3(numBlocks),
+      dim3(ThreadsperBlock), 0, 0, dOData);
+
+  // Copy result from device to host
+  HIP_CHECK(hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost));
+  REQUIRE(hOData[0] == initial_val+INCREMENT_VALUE);
+
+  // Cleanup memory
+  free(hOData);
+  HIP_CHECK(hipFree(dOData));
+
+  return true;
+}
+
+TEST_CASE("Unit_hipTestAtomicAdd") {
+  bool TestPassed = true;
+
+  SECTION("atomic tests with many waves") {
+    REQUIRE(TestPassed == atomictest_manywaves<int>(INT_INITIAL_VALUE));
+    REQUIRE(TestPassed ==
+            atomictest_manywaves<unsigned int>(UNSIGNED_INITIAL_VALUE));
+    REQUIRE(TestPassed == atomictest_manywaves<float>(FLOAT_INITIAL_VALUE));
+    #if HT_AMD
+    REQUIRE(TestPassed ==
+        atomictest_manywaves<uint64_t>(LONG_INITIAL_VALUE));
+    REQUIRE(TestPassed ==
+          atomictest_manywaves<double>(DOUBLE_INITIAL_VALUE));
+    #endif
+  }
+  SECTION("atomic tests with many waves and no return") {
+    REQUIRE(TestPassed ==
+            atomictestnoret_manywaves<float>(FLOAT_INITIAL_VALUE));
+  }
+  SECTION("simple atomic tests") {
+    REQUIRE(TestPassed == atomictest_simple<int>(INT_INITIAL_VALUE));
+    REQUIRE(TestPassed ==
+            atomictest_simple<unsigned int>(UNSIGNED_INITIAL_VALUE));
+    REQUIRE(TestPassed == atomictest_simple<float>(FLOAT_INITIAL_VALUE));
+    #if HT_AMD
+    REQUIRE(TestPassed ==
+            atomictest_simple<uint64_t>(LONG_INITIAL_VALUE));
+    REQUIRE(TestPassed == atomictest_simple<double>(DOUBLE_INITIAL_VALUE));
+    #endif
+  }
+  SECTION("Simple atomic test with no return") {
+    REQUIRE(TestPassed == atomictestnoret_simple<float>(FLOAT_INITIAL_VALUE));
+  }
+}
diff --git a/projects/hip-tests/catch/unit/deviceLib/hipTestClock.cc b/projects/hip-tests/catch/unit/deviceLib/hipTestClock.cc
index 26dd29c76c..5f7ad45008 100644
--- a/projects/hip-tests/catch/unit/deviceLib/hipTestClock.cc
+++ b/projects/hip-tests/catch/unit/deviceLib/hipTestClock.cc
@@ -1,51 +1,51 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_common.hh>
-#include <hip_test_checkers.hh>
-
-#define LEN 512
-#define SIZE (LEN * sizeof(int64_t))
-
-static __global__ void kernel1(int64_t* Ad) {
-  int tid = threadIdx.x + blockIdx.x * blockDim.x;
-  Ad[tid] = clock() + clock64() + __clock() + __clock64();
-}
-
-static __global__ void kernel2(int64_t* Ad) {
-  int tid = threadIdx.x + blockIdx.x * blockDim.x;
-  Ad[tid] = clock() + clock64() + __clock() + __clock64() - Ad[tid];
-}
-
-TEST_CASE("Unit_hipTestClock") {
-  int64_t *A, *Ad;
-  A = new int64_t[LEN];
-  for (unsigned i = 0; i < LEN; i++) {
-      A[i] = 0;
-  }
-  HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), SIZE));
-  HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
-  hipLaunchKernelGGL(kernel1, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
-  hipLaunchKernelGGL(kernel2, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
-  HIP_CHECK(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
-  for (unsigned i = 0; i < LEN; i++) {
-    assert(0 != A[i]);
-  }
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_common.hh>
+#include <hip_test_checkers.hh>
+
+#define LEN 512
+#define SIZE (LEN * sizeof(int64_t))
+
+static __global__ void kernel1(int64_t* Ad) {
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  Ad[tid] = clock() + clock64() + __clock() + __clock64();
+}
+
+static __global__ void kernel2(int64_t* Ad) {
+  int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  Ad[tid] = clock() + clock64() + __clock() + __clock64() - Ad[tid];
+}
+
+TEST_CASE("Unit_hipTestClock") {
+  int64_t *A, *Ad;
+  A = new int64_t[LEN];
+  for (unsigned i = 0; i < LEN; i++) {
+      A[i] = 0;
+  }
+  HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), SIZE));
+  HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
+  hipLaunchKernelGGL(kernel1, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
+  hipLaunchKernelGGL(kernel2, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
+  HIP_CHECK(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
+  for (unsigned i = 0; i < LEN; i++) {
+    assert(0 != A[i]);
+  }
+}
diff --git a/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorName.cc b/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorName.cc
index bae42209ef..10a0797f87 100644
--- a/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorName.cc
+++ b/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorName.cc
@@ -1,88 +1,88 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_common.hh>
-
-#include "error_handling_common.hh"
-
-/**
- * @addtogroup hipDrvGetErrorName hipDrvGetErrorName
- * @{
- * @ingroup ErrorTest
- * `hipDrvGetErrorName(hipError_t hip_error)` -
- * Return hip error as text string form.
- */
-
-/**
- * Test Description
- * ------------------------
- *  - Validate that the correct string is returned for each supported
- *    device error enumeration.
- * Test source
- * ------------------------
- *  - unit/errorHandling/hipDrvGetErrorName.cc
- * Test requirements
- * ------------------------
- *  - HIP_VERSION >= 5.4
- */
-TEST_CASE("Unit_hipDrvGetErrorName_Positive_Basic") {
-  const char* error_string = nullptr;
-  const auto enumerator =
-      GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
-  INFO("Error: " << enumerator);
-
-  HIP_CHECK(hipDrvGetErrorName(enumerator, &error_string));
-
-  REQUIRE(error_string != nullptr);
-  REQUIRE(strcmp(error_string, ErrorName(enumerator)) == 0);
-}
-
-/**
- * Test Description
- * ------------------------
- *  - Validate handling of invalid arguments:
- *    -# When error enumerator is invalid (-1)
- *      - AMD expected output: return "hipErrorUnknown"
- *      - NVIDIA expected output: return "cudaErrorUnknown"
- *    -# When nullptr is passed as store location
- *      - Expected output: return "hipErrorInvalidValue"
- * Test source
- * ------------------------
- *  - unit/errorHandling/hipDrvGetErrorName.cc
- * Test requirements
- * ------------------------
- *  - HIP_VERSION >= 5.4
- */
-TEST_CASE("Unit_hipDrvGetErrorName_Negative_Parameters") {
-  const char* error_string = nullptr;
-  SECTION("pass unknown value to hipError") {
-    HIP_CHECK_ERROR((hipDrvGetErrorName(static_cast<hipError_t>(-1), &error_string)),
-                    hipErrorInvalidValue);
-  }
-#if HT_AMD  // segfaults on NVIDIA
-  SECTION("pass nullptr to error string") {
-    HIP_CHECK_ERROR((hipDrvGetErrorString(hipErrorInvalidValue, nullptr)), hipErrorInvalidValue);
-  }
-#endif
-}
-
-/**
-* End doxygen group ErrorTest.
-* @}
-*/
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+
+#include "error_handling_common.hh"
+
+/**
+ * @addtogroup hipDrvGetErrorName hipDrvGetErrorName
+ * @{
+ * @ingroup ErrorTest
+ * `hipDrvGetErrorName(hipError_t hip_error)` -
+ * Return hip error as text string form.
+ */
+
+/**
+ * Test Description
+ * ------------------------
+ *  - Validate that the correct string is returned for each supported
+ *    device error enumeration.
+ * Test source
+ * ------------------------
+ *  - unit/errorHandling/hipDrvGetErrorName.cc
+ * Test requirements
+ * ------------------------
+ *  - HIP_VERSION >= 5.4
+ */
+TEST_CASE("Unit_hipDrvGetErrorName_Positive_Basic") {
+  const char* error_string = nullptr;
+  const auto enumerator =
+      GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
+  INFO("Error: " << enumerator);
+
+  HIP_CHECK(hipDrvGetErrorName(enumerator, &error_string));
+
+  REQUIRE(error_string != nullptr);
+  REQUIRE(strcmp(error_string, ErrorName(enumerator)) == 0);
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *  - Validate handling of invalid arguments:
+ *    -# When error enumerator is invalid (-1)
+ *      - AMD expected output: return "hipErrorUnknown"
+ *      - NVIDIA expected output: return "cudaErrorUnknown"
+ *    -# When nullptr is passed as store location
+ *      - Expected output: return "hipErrorInvalidValue"
+ * Test source
+ * ------------------------
+ *  - unit/errorHandling/hipDrvGetErrorName.cc
+ * Test requirements
+ * ------------------------
+ *  - HIP_VERSION >= 5.4
+ */
+TEST_CASE("Unit_hipDrvGetErrorName_Negative_Parameters") {
+  const char* error_string = nullptr;
+  SECTION("pass unknown value to hipError") {
+    HIP_CHECK_ERROR((hipDrvGetErrorName(static_cast<hipError_t>(-1), &error_string)),
+                    hipErrorInvalidValue);
+  }
+#if HT_AMD  // segfaults on NVIDIA
+  SECTION("pass nullptr to error string") {
+    HIP_CHECK_ERROR((hipDrvGetErrorString(hipErrorInvalidValue, nullptr)), hipErrorInvalidValue);
+  }
+#endif
+}
+
+/**
+* End doxygen group ErrorTest.
+* @}
+*/
diff --git a/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorString.cc b/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorString.cc
index e81ced683a..04f1a833a1 100644
--- a/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorString.cc
+++ b/projects/hip-tests/catch/unit/errorHandling/hipDrvGetErrorString.cc
@@ -1,88 +1,88 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_common.hh>
-
-#include "error_handling_common.hh"
-
-/**
- * @addtogroup hipDrvGetErrorString hipDrvGetErrorString
- * @{
- * @ingroup ErrorTest
- * `hipDrvGetErrorString(hipError_t hipError)` -
- * Return handy text string message to explain the error which occurred.
- */
-
-/**
- * Test Description
- * ------------------------
- *  - Validate that the correct string is returned for each supported
- *    device error enumeration.
- * Test source
- * ------------------------
- *  - unit/errorHandling/hipDrvGetErrorString.cc
- * Test requirements
- * ------------------------
- *  - HIP_VERSION >= 5.4
- */
-TEST_CASE("Unit_hipDrvGetErrorString_Positive_Basic") {
-  const char* error_string = nullptr;
-  const auto enumerator =
-      GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
-  INFO("Error: " << enumerator);
-
-  HIP_CHECK(hipDrvGetErrorString(enumerator, &error_string));
-
-  REQUIRE(error_string != nullptr);
-  REQUIRE(strcmp(error_string, ErrorString(enumerator)) == 0);
-}
-
-/**
- * Test Description
- * ------------------------
- *  - Validate handling of invalid arguments:
- *    -# When error enumerator is invalid (-1)
- *      - Expected output: return "hipErrorInvalidValue"
- *    -# When nullptr is passed as store location
- *      - Expected output: return "hipErrorInvalidValue"
- * Test source
- * ------------------------
- *  - unit/errorHandling/hipDrvGetErrorString.cc
- * Test requirements
- * ------------------------
- *  - HIP_VERSION >= 5.4
- */
-TEST_CASE("Unit_hipDrvGetErrorString_Negative_Parameters") {
-  const char* error_string = nullptr;
-  SECTION("pass unknown value to hipError") {
-    HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(-1), &error_string)),
-                    hipErrorInvalidValue);
-  }
-#if HT_AMD  // segfaults on NVIDIA
-  SECTION("pass nullptr to error string") {
-    HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(0), nullptr)),
-                    hipErrorInvalidValue);
-  }
-#endif
-}
-
-/**
-* End doxygen group ErrorTest.
-* @}
-*/
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_common.hh>
+
+#include "error_handling_common.hh"
+
+/**
+ * @addtogroup hipDrvGetErrorString hipDrvGetErrorString
+ * @{
+ * @ingroup ErrorTest
+ * `hipDrvGetErrorString(hipError_t hipError)` -
+ * Return handy text string message to explain the error which occurred.
+ */
+
+/**
+ * Test Description
+ * ------------------------
+ *  - Validate that the correct string is returned for each supported
+ *    device error enumeration.
+ * Test source
+ * ------------------------
+ *  - unit/errorHandling/hipDrvGetErrorString.cc
+ * Test requirements
+ * ------------------------
+ *  - HIP_VERSION >= 5.4
+ */
+TEST_CASE("Unit_hipDrvGetErrorString_Positive_Basic") {
+  const char* error_string = nullptr;
+  const auto enumerator =
+      GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators)));
+  INFO("Error: " << enumerator);
+
+  HIP_CHECK(hipDrvGetErrorString(enumerator, &error_string));
+
+  REQUIRE(error_string != nullptr);
+  REQUIRE(strcmp(error_string, ErrorString(enumerator)) == 0);
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *  - Validate handling of invalid arguments:
+ *    -# When error enumerator is invalid (-1)
+ *      - Expected output: return "hipErrorInvalidValue"
+ *    -# When nullptr is passed as store location
+ *      - Expected output: return "hipErrorInvalidValue"
+ * Test source
+ * ------------------------
+ *  - unit/errorHandling/hipDrvGetErrorString.cc
+ * Test requirements
+ * ------------------------
+ *  - HIP_VERSION >= 5.4
+ */
+TEST_CASE("Unit_hipDrvGetErrorString_Negative_Parameters") {
+  const char* error_string = nullptr;
+  SECTION("pass unknown value to hipError") {
+    HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(-1), &error_string)),
+                    hipErrorInvalidValue);
+  }
+#if HT_AMD  // segfaults on NVIDIA
+  SECTION("pass nullptr to error string") {
+    HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast<hipError_t>(0), nullptr)),
+                    hipErrorInvalidValue);
+  }
+#endif
+}
+
+/**
+* End doxygen group ErrorTest.
+* @}
+*/
diff --git a/projects/hip-tests/catch/unit/g++/CMakeLists.txt b/projects/hip-tests/catch/unit/g++/CMakeLists.txt
index 5adf876616..ac33b11b9b 100644
--- a/projects/hip-tests/catch/unit/g++/CMakeLists.txt
+++ b/projects/hip-tests/catch/unit/g++/CMakeLists.txt
@@ -1,19 +1,19 @@
-# AMD specific test
-if(HIP_PLATFORM MATCHES "amd")
-if(UNIX)
-set(TEST_SRC
-    hipMalloc.cc
-)
-# Creating Custom object file
-add_custom_target(malloc_custom COMMAND g++ -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.cpp -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o malloc.o BYPRODUCTS malloc.o)
-add_library(malloc_gpp OBJECT IMPORTED)
-set_property(TARGET malloc_gpp PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/malloc.o")
-
-hip_add_exe_to_target(NAME gppTests
-                      TEST_SRC ${TEST_SRC}
-                      TEST_TARGET_NAME build_tests
-                      LINKER_LIBS malloc_gpp)
-
-add_dependencies(gppTests malloc_custom)
-endif()
-endif()
+# AMD specific test
+if(HIP_PLATFORM MATCHES "amd")
+if(UNIX)
+set(TEST_SRC
+    hipMalloc.cc
+)
+# Creating Custom object file
+add_custom_target(malloc_custom COMMAND g++ -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.cpp -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o malloc.o BYPRODUCTS malloc.o)
+add_library(malloc_gpp OBJECT IMPORTED)
+set_property(TARGET malloc_gpp PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/malloc.o")
+
+hip_add_exe_to_target(NAME gppTests
+                      TEST_SRC ${TEST_SRC}
+                      TEST_TARGET_NAME build_tests
+                      LINKER_LIBS malloc_gpp)
+
+add_dependencies(gppTests malloc_custom)
+endif()
+endif()
diff --git a/projects/hip-tests/catch/unit/g++/hipMalloc.cc b/projects/hip-tests/catch/unit/g++/hipMalloc.cc
index 37a66e22c8..e0cd60306b 100644
--- a/projects/hip-tests/catch/unit/g++/hipMalloc.cc
+++ b/projects/hip-tests/catch/unit/g++/hipMalloc.cc
@@ -1,54 +1,54 @@
-/*
- * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- * */
-
-#include <hip_test_common.hh>
- 
-#include "hipMalloc.h"
-/**
- * @addtogroup hipMalloc hipMalloc
- * @{
- * @ingroup MemoryTest
- * `hipError_t hipMalloc(void** ptr, size_t size)` -
- * Allocate memory on the default accelerator.
- * @}
- */
-
-/**
- * Test Description
- * ------------------------
- *    - Allocate memory by using hipMalloc API and verify hipSuccess is returned.
-
- * Test source
- * ------------------------
- *    - catch/unit/g++/hipMalloc.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.6
- */
-
-TEST_CASE("Unit_hipMalloc_gpptest") {
-  printf("calling cpp function from here\n");
-  int result = MallocFunc();
-  REQUIRE(result == 1);
-}
-
-/**
-* End doxygen group MemoryTest.
-* @}
-*/
+/*
+ * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ * */
+
+#include <hip_test_common.hh>
+ 
+#include "hipMalloc.h"
+/**
+ * @addtogroup hipMalloc hipMalloc
+ * @{
+ * @ingroup MemoryTest
+ * `hipError_t hipMalloc(void** ptr, size_t size)` -
+ * Allocate memory on the default accelerator.
+ * @}
+ */
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Allocate memory by using hipMalloc API and verify hipSuccess is returned.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/g++/hipMalloc.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.6
+ */
+
+TEST_CASE("Unit_hipMalloc_gpptest") {
+  printf("calling cpp function from here\n");
+  int result = MallocFunc();
+  REQUIRE(result == 1);
+}
+
+/**
+* End doxygen group MemoryTest.
+* @}
+*/
diff --git a/projects/hip-tests/catch/unit/g++/hipMalloc.h b/projects/hip-tests/catch/unit/g++/hipMalloc.h
index 458e489c5a..9dee5043ec 100644
--- a/projects/hip-tests/catch/unit/g++/hipMalloc.h
+++ b/projects/hip-tests/catch/unit/g++/hipMalloc.h
@@ -1,22 +1,22 @@
-/*
- * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- * */
-
-#include <iostream>
-
+/*
+ * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ * */
+
+#include <iostream>
+
 extern int MallocFunc();
\ No newline at end of file
diff --git a/projects/hip-tests/catch/unit/gcc/CMakeLists.txt b/projects/hip-tests/catch/unit/gcc/CMakeLists.txt
index 90e9c2db65..5c8385cee8 100644
--- a/projects/hip-tests/catch/unit/gcc/CMakeLists.txt
+++ b/projects/hip-tests/catch/unit/gcc/CMakeLists.txt
@@ -1,28 +1,28 @@
-# Common Tests - Test independent of all platforms
-if(HIP_PLATFORM MATCHES "amd")
-if(UNIX)
-set(TEST_SRC
-    gccTest.cc
-    gpu.cpp
-)
-# Creating Custom object file
-add_custom_command(OUTPUT LaunchKernel.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/LaunchKernel.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o LaunchKernel.o)
-add_custom_target(LaunchKernel_custom DEPENDS LaunchKernel.o)
-add_custom_command(OUTPUT hipMalloc.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o hipMalloc.o)
-add_custom_target(hipMalloc_custom DEPENDS hipMalloc.o)
-
-add_library(LaunchKernel_lib OBJECT IMPORTED)
-add_library(hipMalloc_lib OBJECT IMPORTED)
-
-set_property(TARGET LaunchKernel_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/LaunchKernel.o")
-set_property(TARGET hipMalloc_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/hipMalloc.o")
-
-
-hip_add_exe_to_target(NAME gccTests
-                      TEST_SRC ${TEST_SRC}
-                      TEST_TARGET_NAME build_tests
-		              LINKER_LIBS LaunchKernel_lib hipMalloc_lib)
-
-add_dependencies(gccTests LaunchKernel_custom hipMalloc_custom)
-endif()
-endif()
+# Common Tests - Test independent of all platforms
+if(HIP_PLATFORM MATCHES "amd")
+if(UNIX)
+set(TEST_SRC
+    gccTest.cc
+    gpu.cpp
+)
+# Creating Custom object file
+add_custom_command(OUTPUT LaunchKernel.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/LaunchKernel.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o LaunchKernel.o)
+add_custom_target(LaunchKernel_custom DEPENDS LaunchKernel.o)
+add_custom_command(OUTPUT hipMalloc.o COMMAND gcc -c ${CMAKE_CURRENT_SOURCE_DIR}/hipMalloc.c -I${HIP_PATH}/include -D__HIP_PLATFORM_AMD__ -o hipMalloc.o)
+add_custom_target(hipMalloc_custom DEPENDS hipMalloc.o)
+
+add_library(LaunchKernel_lib OBJECT IMPORTED)
+add_library(hipMalloc_lib OBJECT IMPORTED)
+
+set_property(TARGET LaunchKernel_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/LaunchKernel.o")
+set_property(TARGET hipMalloc_lib PROPERTY IMPORTED_OBJECTS "${CMAKE_CURRENT_BINARY_DIR}/hipMalloc.o")
+
+
+hip_add_exe_to_target(NAME gccTests
+                      TEST_SRC ${TEST_SRC}
+                      TEST_TARGET_NAME build_tests
+		              LINKER_LIBS LaunchKernel_lib hipMalloc_lib)
+
+add_dependencies(gccTests LaunchKernel_custom hipMalloc_custom)
+endif()
+endif()
diff --git a/projects/hip-tests/catch/unit/gcc/gccTest.cc b/projects/hip-tests/catch/unit/gcc/gccTest.cc
index 6332540682..f520419c75 100644
--- a/projects/hip-tests/catch/unit/gcc/gccTest.cc
+++ b/projects/hip-tests/catch/unit/gcc/gccTest.cc
@@ -1,64 +1,64 @@
-/*
- * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- * */
-
-#include <hip_test_common.hh>
- 
-extern "C" {
-#include "LaunchKernel.h"
-}
-
-/**
- * Test Description
- * ------------------------
- *    - calling launchKernel which is c function from catch2
- * and compile with gcc compiler and verify the results.
-
- * Test source
- * ------------------------
- *    - catch/unit/gcc/gccTest.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.6
- */
-
-TEST_CASE("Unit_LaunchKernelgccTests") {
-  printf("Calling launchKernel files from here\n");
-  int result = launchKernel();
-  REQUIRE(result == 1);
-}
-
-/**
- * Test Description
- * ------------------------
- *    - Calling hipMalloc which is c file from catch2 and compile
- * with gcc compiler and verify the results.
-
- * Test source
- * ------------------------
- *    - catch/unit/gcc/gccTest.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.6
- */
-
-TEST_CASE("Unit_hipMallocgccTests") {
-  printf("Calling hipMalloc files from here\n");
-  int result = hipMallocfunc();
-  REQUIRE(result == 1);
-}
+/*
+ * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ * */
+
+#include <hip_test_common.hh>
+ 
+extern "C" {
+#include "LaunchKernel.h"
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - calling launchKernel which is c function from catch2
+ * and compile with gcc compiler and verify the results.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/gcc/gccTest.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.6
+ */
+
+TEST_CASE("Unit_LaunchKernelgccTests") {
+  printf("Calling launchKernel files from here\n");
+  int result = launchKernel();
+  REQUIRE(result == 1);
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Calling hipMalloc which is c file from catch2 and compile
+ * with gcc compiler and verify the results.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/gcc/gccTest.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.6
+ */
+
+TEST_CASE("Unit_hipMallocgccTests") {
+  printf("Calling hipMalloc files from here\n");
+  int result = hipMallocfunc();
+  REQUIRE(result == 1);
+}
diff --git a/projects/hip-tests/catch/unit/kernel/hipDynamicShared.cc b/projects/hip-tests/catch/unit/kernel/hipDynamicShared.cc
index e25938d51c..a96990890c 100644
--- a/projects/hip-tests/catch/unit/kernel/hipDynamicShared.cc
+++ b/projects/hip-tests/catch/unit/kernel/hipDynamicShared.cc
@@ -1,176 +1,176 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
- 
-
-#pragma clang diagnostic ignored "-Wunused-parameter"
-
-unsigned threadsPerBlock = 256;
-
-template <unsigned batch, typename T>
-__device__ void sum(T* sdata, unsigned groupElements, unsigned tid) {
-  T tmp;
-  if (groupElements < batch)
-      return;
-  // sdata[tid] += sdata[tid - batch/2] does not work when block size is
-  // greater than wave size because one wave may complete before another
-  // wave.
-  if (tid >= batch/2 && tid < groupElements)
-      tmp = sdata[tid - batch/2];
-  __syncthreads();
-  if (tid >= batch/2 && tid < groupElements)
-      sdata[tid] += tmp;
-  __syncthreads();
-}
-
-template <typename T>
-__global__ void testExternSharedKernel(const T* A_d, const T* B_d, T* C_d,
-                                    size_t numElements, size_t groupElements) {
-  // declare dynamic shared memory
-  extern __shared__ double sdata0[];
-  T* sdata = reinterpret_cast<T *>(sdata0);
-
-  size_t gid = (blockIdx.x * blockDim.x + threadIdx.x);
-  size_t tid = threadIdx.x;
-
-  // initialize dynamic shared memory
-  if (tid < groupElements) {
-      sdata[tid] = static_cast<T>(tid);
-  }
-  __syncthreads();
-
-  // prefix sum inside dynamic shared memory
-  sum<512>(sdata, groupElements, tid);
-  sum<256>(sdata, groupElements, tid);
-  sum<128>(sdata, groupElements, tid);
-  sum<64>(sdata, groupElements, tid);
-  sum<32>(sdata, groupElements, tid);
-  sum<16>(sdata, groupElements, tid);
-  sum<8>(sdata, groupElements, tid);
-  sum<4>(sdata, groupElements, tid);
-  sum<2>(sdata, groupElements, tid);
-  C_d[gid] = A_d[gid] + B_d[gid] + sdata[tid % groupElements];
-}
-
-template <typename T>
-void testExternShared(size_t N, unsigned groupElements) {
-  size_t Nbytes = N * sizeof(T);
-
-  T *A_d, *B_d, *C_d;
-  T *A_h, *B_h, *C_h;
-
-  HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
-  unsigned blocks = N/threadsPerBlock;
-  assert(N == blocks * threadsPerBlock);
-
-  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
-  HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
-
-  // calculate the amount of dynamic shared memory required
-  size_t groupMemBytes = groupElements * sizeof(T);
-
-  // launch kernel with dynamic shared memory
-  hipLaunchKernelGGL(HIP_KERNEL_NAME(testExternSharedKernel<T>), dim3(blocks),
-                     dim3(threadsPerBlock), groupMemBytes, 0, A_d, B_d, C_d,
-                     N, groupElements);
-
-  HIP_CHECK(hipDeviceSynchronize());
-  HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
-
-  // verify
-  for (size_t i = 0; i < N; ++i) {
-    size_t tid = (i % min(threadsPerBlock, groupElements));
-    T sumFromSharedMemory = static_cast<T>(tid * (tid + 1) / 2);
-    T expected = A_h[i] + B_h[i] + sumFromSharedMemory;
-    REQUIRE(C_h[i] == expected);
-  }
-  HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
-}
-
-/**
-* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
-* @{
-* @ingroup KernelTest
-* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
-   std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
-* Method to invocate kernel functions
-*/
-
-/**
- * Test Description
- * ------------------------
- *    - launch kernel with dynamic shared memory for float and double
- * datatypes and verify the results.
-
- * Test source
- * ------------------------
- *    - catch/unit/kernel/hipDynamicShared.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.5
- */
-
-TEST_CASE("Unit_hipDynamicShared") {
-  SECTION("test case with float for least size") {
-    testExternShared<float>(1024, 4);
-    testExternShared<float>(1024, 8);
-    testExternShared<float>(1024, 16);
-    testExternShared<float>(1024, 32);
-    testExternShared<float>(1024, 64);
-  }
-
-  SECTION("test case with float for max size") {
-    testExternShared<float>(65536, 4);
-    testExternShared<float>(65536, 8);
-    testExternShared<float>(65536, 16);
-    testExternShared<float>(65536, 32);
-    testExternShared<float>(65536, 64);
-  }
-
-  SECTION("test case with double for least size") {
-    testExternShared<double>(1024, 4);
-    testExternShared<double>(1024, 8);
-    testExternShared<double>(1024, 16);
-    testExternShared<double>(1024, 32);
-    testExternShared<double>(1024, 64);
-  }
-
-  SECTION("test case with double for max size") {
-    testExternShared<double>(65536, 4);
-    testExternShared<double>(65536, 8);
-    testExternShared<double>(65536, 16);
-    testExternShared<double>(65536, 32);
-    testExternShared<double>(65536, 64);
-  }
-
-  SECTION("test case with float for max LDS size") {
-    int maxLDS = 0;
-    HIP_CHECK(hipDeviceGetAttribute(&maxLDS,
-                                  hipDeviceAttributeMaxSharedMemoryPerBlock, 0));
-    testExternShared<float>(1024, maxLDS/sizeof(float));
-  }
-}
-
-/**
-* End doxygen group KernelTest.
-* @}
-*/
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+ 
+
+#pragma clang diagnostic ignored "-Wunused-parameter"
+
+unsigned threadsPerBlock = 256;
+
+template <unsigned batch, typename T>
+__device__ void sum(T* sdata, unsigned groupElements, unsigned tid) {
+  T tmp;
+  if (groupElements < batch)
+      return;
+  // sdata[tid] += sdata[tid - batch/2] does not work when block size is
+  // greater than wave size because one wave may complete before another
+  // wave.
+  if (tid >= batch/2 && tid < groupElements)
+      tmp = sdata[tid - batch/2];
+  __syncthreads();
+  if (tid >= batch/2 && tid < groupElements)
+      sdata[tid] += tmp;
+  __syncthreads();
+}
+
+template <typename T>
+__global__ void testExternSharedKernel(const T* A_d, const T* B_d, T* C_d,
+                                    size_t numElements, size_t groupElements) {
+  // declare dynamic shared memory
+  extern __shared__ double sdata0[];
+  T* sdata = reinterpret_cast<T *>(sdata0);
+
+  size_t gid = (blockIdx.x * blockDim.x + threadIdx.x);
+  size_t tid = threadIdx.x;
+
+  // initialize dynamic shared memory
+  if (tid < groupElements) {
+      sdata[tid] = static_cast<T>(tid);
+  }
+  __syncthreads();
+
+  // prefix sum inside dynamic shared memory
+  sum<512>(sdata, groupElements, tid);
+  sum<256>(sdata, groupElements, tid);
+  sum<128>(sdata, groupElements, tid);
+  sum<64>(sdata, groupElements, tid);
+  sum<32>(sdata, groupElements, tid);
+  sum<16>(sdata, groupElements, tid);
+  sum<8>(sdata, groupElements, tid);
+  sum<4>(sdata, groupElements, tid);
+  sum<2>(sdata, groupElements, tid);
+  C_d[gid] = A_d[gid] + B_d[gid] + sdata[tid % groupElements];
+}
+
+template <typename T>
+void testExternShared(size_t N, unsigned groupElements) {
+  size_t Nbytes = N * sizeof(T);
+
+  T *A_d, *B_d, *C_d;
+  T *A_h, *B_h, *C_h;
+
+  HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
+  unsigned blocks = N/threadsPerBlock;
+  assert(N == blocks * threadsPerBlock);
+
+  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
+  HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
+
+  // calculate the amount of dynamic shared memory required
+  size_t groupMemBytes = groupElements * sizeof(T);
+
+  // launch kernel with dynamic shared memory
+  hipLaunchKernelGGL(HIP_KERNEL_NAME(testExternSharedKernel<T>), dim3(blocks),
+                     dim3(threadsPerBlock), groupMemBytes, 0, A_d, B_d, C_d,
+                     N, groupElements);
+
+  HIP_CHECK(hipDeviceSynchronize());
+  HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
+
+  // verify
+  for (size_t i = 0; i < N; ++i) {
+    size_t tid = (i % min(threadsPerBlock, groupElements));
+    T sumFromSharedMemory = static_cast<T>(tid * (tid + 1) / 2);
+    T expected = A_h[i] + B_h[i] + sumFromSharedMemory;
+    REQUIRE(C_h[i] == expected);
+  }
+  HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
+}
+
+/**
+* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
+* @{
+* @ingroup KernelTest
+* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
+   std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
+* Method to invocate kernel functions
+*/
+
+/**
+ * Test Description
+ * ------------------------
+ *    - launch kernel with dynamic shared memory for float and double
+ * datatypes and verify the results.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/kernel/hipDynamicShared.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.5
+ */
+
+TEST_CASE("Unit_hipDynamicShared") {
+  SECTION("test case with float for least size") {
+    testExternShared<float>(1024, 4);
+    testExternShared<float>(1024, 8);
+    testExternShared<float>(1024, 16);
+    testExternShared<float>(1024, 32);
+    testExternShared<float>(1024, 64);
+  }
+
+  SECTION("test case with float for max size") {
+    testExternShared<float>(65536, 4);
+    testExternShared<float>(65536, 8);
+    testExternShared<float>(65536, 16);
+    testExternShared<float>(65536, 32);
+    testExternShared<float>(65536, 64);
+  }
+
+  SECTION("test case with double for least size") {
+    testExternShared<double>(1024, 4);
+    testExternShared<double>(1024, 8);
+    testExternShared<double>(1024, 16);
+    testExternShared<double>(1024, 32);
+    testExternShared<double>(1024, 64);
+  }
+
+  SECTION("test case with double for max size") {
+    testExternShared<double>(65536, 4);
+    testExternShared<double>(65536, 8);
+    testExternShared<double>(65536, 16);
+    testExternShared<double>(65536, 32);
+    testExternShared<double>(65536, 64);
+  }
+
+  SECTION("test case with float for max LDS size") {
+    int maxLDS = 0;
+    HIP_CHECK(hipDeviceGetAttribute(&maxLDS,
+                                  hipDeviceAttributeMaxSharedMemoryPerBlock, 0));
+    testExternShared<float>(1024, maxLDS/sizeof(float));
+  }
+}
+
+/**
+* End doxygen group KernelTest.
+* @}
+*/
diff --git a/projects/hip-tests/catch/unit/kernel/hipDynamicShared2.cc b/projects/hip-tests/catch/unit/kernel/hipDynamicShared2.cc
index 248b7a0dbc..3fa8dad04a 100644
--- a/projects/hip-tests/catch/unit/kernel/hipDynamicShared2.cc
+++ b/projects/hip-tests/catch/unit/kernel/hipDynamicShared2.cc
@@ -1,94 +1,94 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
- 
-
-#define LEN  (16 * 1024)
-#define SIZE (LEN * sizeof(float))
-
-__global__ void vectorAdd(float* Ad, float* Bd) {
-  extern __shared__ float sBd[];
-  int tx = threadIdx.x;
-  for (int i = 0; i < LEN / 64; i++) {
-    sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f;
-    Bd[tx + i * 64] = sBd[tx + i * 64];
-  }
-}
-
-/**
-* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
-* @{
-* @ingroup KernelTest
-* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
-   std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
-* Method to invocate kernel functions
-*/
-
-/**
- * Test Description
- * ------------------------
- *    - Assign max dynamic shared memory to kernel function and 
- * verify the results.
-
- * Test source
- * ------------------------
- *    - catch/unit/kernel/hipDynamicShared2.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.5
- */
-
-TEST_CASE("Unit_hipDynamicShared2") {
-  float *A, *B, *Ad, *Bd;
-  A = new float[LEN];
-  B = new float[LEN];
-  for (int i = 0; i < LEN; i++) {
-      A[i] = 1.0f;
-      B[i] = 1.0f;
-  }
-  HIP_CHECK(hipMalloc(&Ad, SIZE));
-  HIP_CHECK(hipMalloc(&Bd, SIZE));
-  HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
-  HIP_CHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
-
-  hipError_t ret = hipFuncSetAttribute(
-      reinterpret_cast<const void*>(&vectorAdd),
-      hipFuncAttributeMaxDynamicSharedMemorySize, SIZE);
-
-  REQUIRE(ret == hipSuccess);
-  hipLaunchKernelGGL(vectorAdd, dim3(1, 1, 1), dim3(64, 1, 1), SIZE, 0, Ad, Bd);
-  HIP_CHECK(hipGetLastError());
-  HIP_CHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost));
-  for (int i = 0; i < LEN; i++) {
-    assert(B[i] > 1.0f && B[i] < 3.0f);
-  }
-  HIP_CHECK(hipFree(Ad));
-  HIP_CHECK(hipFree(Bd));
-
-  delete[] A;
-  delete[] B;
-}
-
-/**
-* End doxygen group KernelTest.
-* @}
-*/
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+ 
+
+#define LEN  (16 * 1024)
+#define SIZE (LEN * sizeof(float))
+
+__global__ void vectorAdd(float* Ad, float* Bd) {
+  extern __shared__ float sBd[];
+  int tx = threadIdx.x;
+  for (int i = 0; i < LEN / 64; i++) {
+    sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f;
+    Bd[tx + i * 64] = sBd[tx + i * 64];
+  }
+}
+
+/**
+* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
+* @{
+* @ingroup KernelTest
+* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
+   std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
+* Method to invocate kernel functions
+*/
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Assign max dynamic shared memory to kernel function and 
+ * verify the results.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/kernel/hipDynamicShared2.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.5
+ */
+
+TEST_CASE("Unit_hipDynamicShared2") {
+  float *A, *B, *Ad, *Bd;
+  A = new float[LEN];
+  B = new float[LEN];
+  for (int i = 0; i < LEN; i++) {
+      A[i] = 1.0f;
+      B[i] = 1.0f;
+  }
+  HIP_CHECK(hipMalloc(&Ad, SIZE));
+  HIP_CHECK(hipMalloc(&Bd, SIZE));
+  HIP_CHECK(hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice));
+  HIP_CHECK(hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice));
+
+  hipError_t ret = hipFuncSetAttribute(
+      reinterpret_cast<const void*>(&vectorAdd),
+      hipFuncAttributeMaxDynamicSharedMemorySize, SIZE);
+
+  REQUIRE(ret == hipSuccess);
+  hipLaunchKernelGGL(vectorAdd, dim3(1, 1, 1), dim3(64, 1, 1), SIZE, 0, Ad, Bd);
+  HIP_CHECK(hipGetLastError());
+  HIP_CHECK(hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost));
+  for (int i = 0; i < LEN; i++) {
+    assert(B[i] > 1.0f && B[i] < 3.0f);
+  }
+  HIP_CHECK(hipFree(Ad));
+  HIP_CHECK(hipFree(Bd));
+
+  delete[] A;
+  delete[] B;
+}
+
+/**
+* End doxygen group KernelTest.
+* @}
+*/
diff --git a/projects/hip-tests/catch/unit/kernel/hipEmptyKernel.cc b/projects/hip-tests/catch/unit/kernel/hipEmptyKernel.cc
index 478f39550a..d26a94b197 100644
--- a/projects/hip-tests/catch/unit/kernel/hipEmptyKernel.cc
+++ b/projects/hip-tests/catch/unit/kernel/hipEmptyKernel.cc
@@ -1,59 +1,59 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
- 
-
-#pragma clang diagnostic ignored "-Wunused-parameter"
-
-__global__ void Empty(int param) {}
-
-/**
-* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
-* @{
-* @ingroup KernelTest
-* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
-   std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
-* Method to invocate kernel functions
-*/
-
-/**
- * Test Description
- * ------------------------
- *    - pass empty Kernel function.
-
- * Test source
- * ------------------------
- *    - catch/unit/kernel/hipEmptyKernel.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.5
- */
-
-TEST_CASE("Unit_hipEmptyKernel") {
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(Empty), dim3(1), dim3(1), 0, 0, 0);
-    HIP_CHECK(hipDeviceSynchronize());
-}
-
-/**
-* End doxygen group KernelTest.
-* @}
-*/
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+ 
+
+#pragma clang diagnostic ignored "-Wunused-parameter"
+
+__global__ void Empty(int param) {}
+
+/**
+* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
+* @{
+* @ingroup KernelTest
+* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
+   std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
+* Method to invocate kernel functions
+*/
+
+/**
+ * Test Description
+ * ------------------------
+ *    - pass empty Kernel function.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/kernel/hipEmptyKernel.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.5
+ */
+
+TEST_CASE("Unit_hipEmptyKernel") {
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(Empty), dim3(1), dim3(1), 0, 0, 0);
+    HIP_CHECK(hipDeviceSynchronize());
+}
+
+/**
+* End doxygen group KernelTest.
+* @}
+*/
diff --git a/projects/hip-tests/catch/unit/kernel/hipExtLaunchKernelGGL.cc b/projects/hip-tests/catch/unit/kernel/hipExtLaunchKernelGGL.cc
index 962b58e605..9bdf7ebc76 100644
--- a/projects/hip-tests/catch/unit/kernel/hipExtLaunchKernelGGL.cc
+++ b/projects/hip-tests/catch/unit/kernel/hipExtLaunchKernelGGL.cc
@@ -1,138 +1,138 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-// Test the Grid_Launch syntax.
-
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
- 
-#include "hip/hip_ext.h"
-
-static unsigned threadsPerBlock = 256;
-static unsigned blocksPerCU = 6;
-
-struct _t {
-    double _a, _b, _c, _d, _e, _f, _g, _h, _i, _j;
-};
-
-typedef struct _t _T;
-
-__global__ void sKernel(_T s, double *a) {
-    *a = s._a + s._b + s._c + s._d + s._e + s._f + s._g + s._h + s._i + s._j;
-}
-
-__global__ void mKernel(char f, int16_t a, int b, double c,
-                        int16_t d, int e, double* res) {
-    *res = a + b + c + d + e + f;
-}
-
-void testMixData() {
-  double m = 0;
-  double *d_m;
-  HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
-  int a = 1, e = 10;
-  int16_t b = 2, d = 4;
-  double c = 3.0;
-  char ff = 10;
-  hipExtLaunchKernelGGL(mKernel, 1, 1, 0, 0, nullptr, nullptr, 0, ff,
-                         b, a, c, d, e, d_m);
-  HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
-  REQUIRE(m == 30.0);
-  HIP_CHECK(hipFree(d_m));
-}
-
-void testStruct() {
-  double m = 0;
-  double *d_m;
-  HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
-  _T s{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
-  hipExtLaunchKernelGGL(sKernel, 1, 1, 0, 0, nullptr, nullptr, 0, s, d_m);
-  HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
-  REQUIRE(m == 55.0);
-  HIP_CHECK(hipFree(d_m));
-}
-
-void test(size_t N) {
-  size_t Nbytes = N * sizeof(int);
-  int *A_d, *B_d, *C_d;
-  int *A_h, *B_h, *C_h;
-
-  HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
-
-  unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
-
-  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
-  HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
-
-  hipExtLaunchKernelGGL(HipTest::vectorADD, dim3(blocks),
-                        dim3(threadsPerBlock), 0, 0, nullptr, nullptr, 0,
-                        static_cast<const int*>(A_d),
-                        static_cast<const int*>(B_d), C_d, N);
-
-  HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
-  HIP_CHECK(hipDeviceSynchronize());
-  HipTest::checkVectorADD(A_h, B_h, C_h, N);
-}
-
-/**
-* @addtogroup hipExtLaunchKernelGGL hipExtLaunchKernelGGL
-* @{
-* @ingroup KernelTest
-* `void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
-                              std::uint32_t sharedMemBytes, hipStream_t stream,
-                              hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags,
-                              Args... args)` -
-* Launches kernel with dimention parameters and shared memory on stream with templated kernel and arguments
-*/
-
-/**
- * Test Description
- * ------------------------
- *    - Test case to verify sample array with hipExtLaunchKernelGGL()
- * and verify the results.
- *    - Test case to verify struct data with hipExtLaunchKernelGGL()
- * and verify the results.
- *    - Test case to verify mix datatypes with hipExtLaunchKernelGGL()
- * and verify the results.
-
- * Test source
- * ------------------------
- *    - catch/unit/kernel/hipExtLaunchKernelGGL.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.5
- */
-
-TEST_CASE("Unit_hipExtLaunchKernelGGL") {
-  SECTION("test run") {
-    size_t N = 4 * 1024 * 1024;
-    test(N);
-  }
-  SECTION("testStruct run") {
-    testStruct();
-  }
-  SECTION("testMixData run") {
-    testMixData();
-  }
-}
-
-/**
-* End doxygen group KernelTest.
-* @}
-*/
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+// Test the Grid_Launch syntax.
+
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+ 
+#include "hip/hip_ext.h"
+
+static unsigned threadsPerBlock = 256;
+static unsigned blocksPerCU = 6;
+
+struct _t {
+    double _a, _b, _c, _d, _e, _f, _g, _h, _i, _j;
+};
+
+typedef struct _t _T;
+
+__global__ void sKernel(_T s, double *a) {
+    *a = s._a + s._b + s._c + s._d + s._e + s._f + s._g + s._h + s._i + s._j;
+}
+
+__global__ void mKernel(char f, int16_t a, int b, double c,
+                        int16_t d, int e, double* res) {
+    *res = a + b + c + d + e + f;
+}
+
+void testMixData() {
+  double m = 0;
+  double *d_m;
+  HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
+  int a = 1, e = 10;
+  int16_t b = 2, d = 4;
+  double c = 3.0;
+  char ff = 10;
+  hipExtLaunchKernelGGL(mKernel, 1, 1, 0, 0, nullptr, nullptr, 0, ff,
+                         b, a, c, d, e, d_m);
+  HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
+  REQUIRE(m == 30.0);
+  HIP_CHECK(hipFree(d_m));
+}
+
+void testStruct() {
+  double m = 0;
+  double *d_m;
+  HIP_CHECK(hipMalloc(&d_m, sizeof(double)));
+  _T s{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+  hipExtLaunchKernelGGL(sKernel, 1, 1, 0, 0, nullptr, nullptr, 0, s, d_m);
+  HIP_CHECK(hipMemcpy(&m, d_m, sizeof(double), hipMemcpyDeviceToHost));
+  REQUIRE(m == 55.0);
+  HIP_CHECK(hipFree(d_m));
+}
+
+void test(size_t N) {
+  size_t Nbytes = N * sizeof(int);
+  int *A_d, *B_d, *C_d;
+  int *A_h, *B_h, *C_h;
+
+  HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
+
+  unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
+
+  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
+  HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
+
+  hipExtLaunchKernelGGL(HipTest::vectorADD, dim3(blocks),
+                        dim3(threadsPerBlock), 0, 0, nullptr, nullptr, 0,
+                        static_cast<const int*>(A_d),
+                        static_cast<const int*>(B_d), C_d, N);
+
+  HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
+  HIP_CHECK(hipDeviceSynchronize());
+  HipTest::checkVectorADD(A_h, B_h, C_h, N);
+}
+
+/**
+* @addtogroup hipExtLaunchKernelGGL hipExtLaunchKernelGGL
+* @{
+* @ingroup KernelTest
+* `void hipExtLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
+                              std::uint32_t sharedMemBytes, hipStream_t stream,
+                              hipEvent_t startEvent, hipEvent_t stopEvent, std::uint32_t flags,
+                              Args... args)` -
+* Launches kernel with dimention parameters and shared memory on stream with templated kernel and arguments
+*/
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Test case to verify sample array with hipExtLaunchKernelGGL()
+ * and verify the results.
+ *    - Test case to verify struct data with hipExtLaunchKernelGGL()
+ * and verify the results.
+ *    - Test case to verify mix datatypes with hipExtLaunchKernelGGL()
+ * and verify the results.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/kernel/hipExtLaunchKernelGGL.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.5
+ */
+
+TEST_CASE("Unit_hipExtLaunchKernelGGL") {
+  SECTION("test run") {
+    size_t N = 4 * 1024 * 1024;
+    test(N);
+  }
+  SECTION("testStruct run") {
+    testStruct();
+  }
+  SECTION("testMixData run") {
+    testMixData();
+  }
+}
+
+/**
+* End doxygen group KernelTest.
+* @}
+*/
diff --git a/projects/hip-tests/catch/unit/kernel/hipGridLaunch.cc b/projects/hip-tests/catch/unit/kernel/hipGridLaunch.cc
index 29667aa60b..91ba18a4a1 100644
--- a/projects/hip-tests/catch/unit/kernel/hipGridLaunch.cc
+++ b/projects/hip-tests/catch/unit/kernel/hipGridLaunch.cc
@@ -1,122 +1,122 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-// Test the Grid_Launch syntax.
-
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
- 
-
-static unsigned threadsPerBlock = 256;
-static unsigned blocksPerCU = 6;
-
-// __device__ maps to __attribute__((hc))
-__device__ int foo(int i) { return i + 1; }
-
-
-template <typename T>
-__global__ void vectorADD2(T* A_d, T* B_d, T* C_d, size_t N) {
-    size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-    size_t stride = blockDim.x * gridDim.x;
-
-    for (size_t i = offset; i < N; i += stride) {
-        double foo = __hiloint2double(A_d[i], B_d[i]);
-        C_d[i] = __double2loint(foo) + __double2hiint(foo);
-    }
-}
-
-int test_gl2(size_t N) {
-  size_t Nbytes = N * sizeof(int);
-  int *A_d, *B_d, *C_d;
-  int *A_h, *B_h, *C_h;
-  HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
-
-  unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
-
-  // Full vadd in one large chunk, to get things started:
-  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
-  HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
-  hipLaunchKernelGGL(vectorADD2, dim3(blocks), dim3(threadsPerBlock),
-                      0, 0, A_d, B_d, C_d, N);
-  HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
-  HIP_CHECK(hipDeviceSynchronize());
-  // verify
-  HipTest::checkVectorADD(A_h, B_h, C_h, N);
-  return 0;
-}
-
-#if __HIP__
-int test_triple_chevron(size_t N) {
-  size_t Nbytes = N * sizeof(int);
-  int *A_d, *B_d, *C_d;
-  int *A_h, *B_h, *C_h;
-  HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
-
-  unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
-  // Full vadd in one large chunk, to get things started:
-  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
-  HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
-  vectorADD2<<<dim3(blocks), dim3(threadsPerBlock)>>>(A_d, B_d, C_d, N);
-  HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
-  HIP_CHECK(hipDeviceSynchronize());
-  // verify
-  HipTest::checkVectorADD(A_h, B_h, C_h, N);
-  return 0;
-}
-#endif
-
-/**
-* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
-* @{
-* @ingroup KernelTest
-* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
-   std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
-* Method to invocate kernel functions
-*/
-
-/**
- * Test Description
- * ------------------------
- *    - Test case to verify the Grid_Launch syntax.
-
- * Test source
- * ------------------------
- *    - catch/unit/kernel/hipGridLaunch.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.5
- */
-
-TEST_CASE("Unit_hipGridLaunch") {
-  size_t N = 4 * 1024 * 1024;
-  SECTION("Test test_gl2") {
-    test_gl2(N);
-  }
-
-#if __HIP__
-  SECTION("Test triple_chevron") {
-    test_triple_chevron(N);
-  }
-#endif
-}
-
-/**
-* End doxygen group KernelTest.
-* @}
-*/
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+// Test the Grid_Launch syntax.
+
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+ 
+
+static unsigned threadsPerBlock = 256;
+static unsigned blocksPerCU = 6;
+
+// __device__ maps to __attribute__((hc))
+__device__ int foo(int i) { return i + 1; }
+
+
+template <typename T>
+__global__ void vectorADD2(T* A_d, T* B_d, T* C_d, size_t N) {
+    size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
+    size_t stride = blockDim.x * gridDim.x;
+
+    for (size_t i = offset; i < N; i += stride) {
+        double foo = __hiloint2double(A_d[i], B_d[i]);
+        C_d[i] = __double2loint(foo) + __double2hiint(foo);
+    }
+}
+
+int test_gl2(size_t N) {
+  size_t Nbytes = N * sizeof(int);
+  int *A_d, *B_d, *C_d;
+  int *A_h, *B_h, *C_h;
+  HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
+
+  unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
+
+  // Full vadd in one large chunk, to get things started:
+  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
+  HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
+  hipLaunchKernelGGL(vectorADD2, dim3(blocks), dim3(threadsPerBlock),
+                      0, 0, A_d, B_d, C_d, N);
+  HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
+  HIP_CHECK(hipDeviceSynchronize());
+  // verify
+  HipTest::checkVectorADD(A_h, B_h, C_h, N);
+  return 0;
+}
+
+#if __HIP__
+int test_triple_chevron(size_t N) {
+  size_t Nbytes = N * sizeof(int);
+  int *A_d, *B_d, *C_d;
+  int *A_h, *B_h, *C_h;
+  HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
+
+  unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
+  // Full vadd in one large chunk, to get things started:
+  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
+  HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
+  vectorADD2<<<dim3(blocks), dim3(threadsPerBlock)>>>(A_d, B_d, C_d, N);
+  HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
+  HIP_CHECK(hipDeviceSynchronize());
+  // verify
+  HipTest::checkVectorADD(A_h, B_h, C_h, N);
+  return 0;
+}
+#endif
+
+/**
+* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
+* @{
+* @ingroup KernelTest
+* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
+   std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
+* Method to invocate kernel functions
+*/
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Test case to verify the Grid_Launch syntax.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/kernel/hipGridLaunch.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.5
+ */
+
+TEST_CASE("Unit_hipGridLaunch") {
+  size_t N = 4 * 1024 * 1024;
+  SECTION("Test test_gl2") {
+    test_gl2(N);
+  }
+
+#if __HIP__
+  SECTION("Test triple_chevron") {
+    test_triple_chevron(N);
+  }
+#endif
+}
+
+/**
+* End doxygen group KernelTest.
+* @}
+*/
diff --git a/projects/hip-tests/catch/unit/kernel/hipLanguageExtensions.cc b/projects/hip-tests/catch/unit/kernel/hipLanguageExtensions.cc
index 0308c087d1..2ef83a8713 100644
--- a/projects/hip-tests/catch/unit/kernel/hipLanguageExtensions.cc
+++ b/projects/hip-tests/catch/unit/kernel/hipLanguageExtensions.cc
@@ -1,111 +1,111 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_common.hh>
-#include <hip_test_checkers.hh>
- 
-#include <hip/math_functions.h>
-
-#pragma clang diagnostic ignored "-Wunused-variable"
-#pragma clang diagnostic ignored "-Wuninitialized"
-
-// Simple tests for variable type qualifiers:
-__device__ int deviceVar;
-
-// TODO-HCC __constant__ not working yet.
-__constant__ int constantVar1;
-
-__constant__ __device__ int constantVar2;
-
-// Test HOST space:
-__host__ void foo() { printf("foo!\n"); }
-
-__device__ __noinline__ int sum1_noinline(int a) { return a + 1; }
-__device__ __forceinline__ int sum1_forceinline(int a) { return a + 1; }
-
-
-__device__ __host__ float PlusOne(float x) { return x + 1.0; }
-
-__global__ void MyKernel(const float* a, const float* b, float* c,
-                         unsigned N) {
-  unsigned gid = threadIdx.x;
-  if (gid < N) {
-      c[gid] = a[gid] + PlusOne(b[gid]);
-  }
-}
-
-void callMyKernel() {
-  float *a, *b, *c;
-  const unsigned blockSize = 256;
-  unsigned N = blockSize;
-  hipLaunchKernelGGL(MyKernel, dim3(N / blockSize), dim3(blockSize),
-                     0, 0, a, b, c, N);
-}
-
-template <typename T>
-__global__ void vectorADD(T __restrict__* A_d, T* B_d, T* C_d, size_t N) {
-#ifdef NOT_YET
-  int a = __shfl_up(x, 1);
-#endif
-  float x = 1.0;
-#ifdef NOT_YET
-    float fastZ = __sin(x);
-#endif
-  __syncthreads();
-
-  size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-  size_t stride = blockDim.x * gridDim.x;
-
-  for (size_t i = offset; i < N; i += stride) {
-      C_d[i] = A_d[i] + B_d[i];
-  }
-}
-
-/**
-* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
-* @{
-* @ingroup KernelTest
-* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
-   std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
-* Method to invocate kernel functions
-*/
-
-/**
- * Test Description
- * ------------------------
- *    - Collection of code to make sure that various features
- * in the hip kernel language compile.
-
- * Test source
- * ------------------------
- *    - catch/unit/kernel/hipLanguageExtensions.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.5
- */
-
-TEST_CASE("Unit_hipLanguageExtensions") {
-  REQUIRE(true);
-}
-
-/**
-* End doxygen group KernelTest.
-* @}
-*/
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_common.hh>
+#include <hip_test_checkers.hh>
+ 
+#include <hip/math_functions.h>
+
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wuninitialized"
+
+// Simple tests for variable type qualifiers:
+__device__ int deviceVar;
+
+// TODO-HCC __constant__ not working yet.
+__constant__ int constantVar1;
+
+__constant__ __device__ int constantVar2;
+
+// Test HOST space:
+__host__ void foo() { printf("foo!\n"); }
+
+__device__ __noinline__ int sum1_noinline(int a) { return a + 1; }
+__device__ __forceinline__ int sum1_forceinline(int a) { return a + 1; }
+
+
+__device__ __host__ float PlusOne(float x) { return x + 1.0; }
+
+__global__ void MyKernel(const float* a, const float* b, float* c,
+                         unsigned N) {
+  unsigned gid = threadIdx.x;
+  if (gid < N) {
+      c[gid] = a[gid] + PlusOne(b[gid]);
+  }
+}
+
+void callMyKernel() {
+  float *a, *b, *c;
+  const unsigned blockSize = 256;
+  unsigned N = blockSize;
+  hipLaunchKernelGGL(MyKernel, dim3(N / blockSize), dim3(blockSize),
+                     0, 0, a, b, c, N);
+}
+
+template <typename T>
+__global__ void vectorADD(T __restrict__* A_d, T* B_d, T* C_d, size_t N) {
+#ifdef NOT_YET
+  int a = __shfl_up(x, 1);
+#endif
+  float x = 1.0;
+#ifdef NOT_YET
+    float fastZ = __sin(x);
+#endif
+  __syncthreads();
+
+  size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
+  size_t stride = blockDim.x * gridDim.x;
+
+  for (size_t i = offset; i < N; i += stride) {
+      C_d[i] = A_d[i] + B_d[i];
+  }
+}
+
+/**
+* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
+* @{
+* @ingroup KernelTest
+* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
+   std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
+* Method to invocate kernel functions
+*/
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Collection of code to make sure that various features
+ * in the hip kernel language compile.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/kernel/hipLanguageExtensions.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.5
+ */
+
+TEST_CASE("Unit_hipLanguageExtensions") {
+  REQUIRE(true);
+}
+
+/**
+* End doxygen group KernelTest.
+* @}
+*/
diff --git a/projects/hip-tests/catch/unit/kernel/hipLaunchParm.cc b/projects/hip-tests/catch/unit/kernel/hipLaunchParm.cc
index 86d574778c..016454eeea 100644
--- a/projects/hip-tests/catch/unit/kernel/hipLaunchParm.cc
+++ b/projects/hip-tests/catch/unit/kernel/hipLaunchParm.cc
@@ -1,1019 +1,1019 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
- 
-#include <cstdint>
-
-#pragma clang diagnostic ignored "-Wunused-variable"
-#pragma clang diagnostic ignored "-Wunused-parameter"
-#pragma clang diagnostic ignored "-Wunused-result"
-#pragma clang diagnostic ignored "-Wuninitialized"
-
-// Memory alignment is broken
-// Update: with latest changes the aligment is working fine, hence enabled
-#define ENABLE_ALIGNMENT_TEST_SMALL_BAR 1
-
-// Packed member atribute broken
-#define ENABLE_PACKED_TEST 0
-
-// Update: with latest changes struct class object
-// from device is working fine, hence enabled
-#define ENABLE_CLASS_OBJ_ACCESS 1
-
-// accessing dynamic/heap memory from device is broken
-#define ENABLE_HEAP_MEMORY_ACCESS 0
-
-// Update: with latest changes it's working hence enabled
-#define ENABLE_USER_STL 1
-
-// Update: with latest changes it's working hence enabled
-#define ENABLE_OUT_OF_ORDER_INITIALIZATION 1
-
-// Direct initialization of struct broken,
-// ip_d9 is a pointer, uint_t*, hipLaunchKernelStruct_h9 = {'c', ip_d9};
-#define ENABLE_DECLARE_INITIALIZATION_POINTER 0
-
-// Bit fields are broken
-#define ENABLE_BIT_FIELDS 0
-
-static const int  BLOCK_DIM_SIZE = 512;
-
-// allocate memory on device and host for result validation
-static bool *result_d, *result_h;
-
-static hipError_t hipMallocError = hipErrorUnknown;
-static hipError_t hipHostMallocError = hipErrorUnknown;
-static hipError_t hipMemsetError = hipErrorUnknown;
-
-static void ResultValidation() {
-  HIP_CHECK(hipMemcpy(result_h, result_d, BLOCK_DIM_SIZE*sizeof(bool),
-            hipMemcpyDeviceToHost));
-
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
-    REQUIRE(result_h[k] == true);
-  }
-  return;
-}
-
-// Segregating the reset part as it was causing a problem when i put inside
-// ResultValidation() function, the memory was not reset correctly for the
-// tests which were disabled.
-static void ResetValidationMem() {
-  // reset the memory to false to reuse it.
-  HIP_CHECK(hipMemset(result_d, false, BLOCK_DIM_SIZE));
-  HIP_CHECK(hipMemset(result_h, false, BLOCK_DIM_SIZE));
-  return;
-}
-
-// This test is to verify Struct with variables
-// support, read from device.
-typedef struct hipLaunchKernelStruct1 {
-  int li;  // local int
-  float lf;  // local float
-  bool result;  // local bool
-} hipLaunchKernelStruct_t1;
-
-// This test is to verify struct with padding, read from device
-typedef struct hipLaunchKernelStruct2 {
-  char c1;
-  int64_t l1;
-  char c2;
-  int64_t l2;
-  bool result;
-} hipLaunchKernelStruct_t2;
-
-// This test is to verify struct with padding, read from device
-typedef struct hipLaunchKernelStruct3 {
-  char bf1;
-  char bf2;
-  int64_t l1;
-  char bf3;
-  bool result;
-} hipLaunchKernelStruct_t3;
-
-// This test is to verify empty struct
-typedef struct hipLaunchKernelStruct4 {
-  // empty struct, size will be verified from device side,size 1Byte
-} hipLaunchKernelStruct_t4;
-
-// This test is to verify struct with pointer member variable.
-typedef struct hipLaunchKernelStruct5 {
-  char c1;
-  char* cp;  // char pointer
-} hipLaunchKernelStruct_t5;
-
-
-// This test is to verify struct with aligned(8),
-// right now it's broken on hcc & hip-clang
-typedef struct hipLaunchKernelStruct6 {
-  char c1;
-  int16_t si;
-} __attribute__((aligned(8)))  hipLaunchKernelStruct_t6;
-
-// This test is to verify struct with aligned(16),
-// right now it's brokenon hcc & hip-clang
-typedef struct hipLaunchKernelStruct7 {
-  char c1;
-  int16_t si;
-} __attribute__((aligned(16)))  hipLaunchKernelStruct_t7;
-
-// This test is to verify struct with packed & aligned,
-// size should be 4Bytes right now it's broken on hcc & hip-clang
-typedef struct hipLaunchKernelStruct8 {
-  char c1;
-  int16_t si;
-  bool b;
-}__attribute__((packed, aligned(4)))  hipLaunchKernelStruct_t8;
-
-// This test is to verify struct with packed, no alignment as Sam suggested
-// size should be 4Bytes, right now it's broken on hcc & hip-clang
-typedef struct hipLaunchKernelStruct8A {
-  char c1;
-  int16_t si;
-  bool b;
-}__attribute__((packed))  hipLaunchKernelStruct_t8A;
-
-// This test is to verify struct with alignment, no packing as Sam suggested
-// size should be 8Bytes as no packing, right now it's broken on hcc & hip-clang
-typedef struct hipLaunchKernelStruct8B {
-  char c1;
-  int16_t si;
-  bool b;
-}__attribute__((aligned(8)))  hipLaunchKernelStruct_t8B;
-
-// This test is to verify const struct object
-typedef struct hipLaunchKernelStruct9 {
-  char c1;
-  uint32_t* ip;  // uint pointer
-} hipLaunchKernelStruct_t9;
-
-// This test is to verify struct with stdint types, uintN_t
-typedef struct hipLaunchKernelStruct10 {
-  uint64_t u64;
-  uint32_t u32;
-  uint8_t u8;
-} hipLaunchKernelStruct_t10;
-
-// This test is to verify struct with volatile member
-typedef struct hipLaunchKernelStruct11 {
-  int i1;
-  volatile unsigned int vint;
-} hipLaunchKernelStruct_t11;
-
-// This test is to verify struct with simple class object
-class base {
- public:
-    int i = 0;
-    base() {}
-};
-typedef struct hipLaunchKernelStruct12 {
-  base b;
-  char c1;
-} hipLaunchKernelStruct_t12;
-
-// This test is to verify struct with __device__ func() attribute
-typedef struct hipLaunchKernelStruct13 {
-  int i1;
-  __device__ int getvalue() { return i1; }
-} hipLaunchKernelStruct_t13;
-
-// This test is to verify struct with array variable,
-// write to from device
-typedef struct hipLaunchKernelStruct14 {
-  int readint;
-  int writeint[BLOCK_DIM_SIZE];  // will write to this from device
-} hipLaunchKernelStruct_t14;
-
-// This test is to verify struct with dynamic memory, new int
-// the heap memory will be accessed from device
-typedef struct hipLaunchKernelStruct15 {
-  char c1;
-  int* heapmem;  // allocated using hipMalloc()
-} hipLaunchKernelStruct_t15;
-
-// This test is to verify simple template struct
-template<typename T>
-struct hipLaunchKernelStruct_t16 {
-  T t1;
-};
-
-// This test is to verify simple explicity template struct
-template<typename T> struct hipLaunchKernelStruct_t17 {};
-template<>  // explicit template
-struct hipLaunchKernelStruct_t17<int> {
-  int t1;
-};
-
-// This test is to verity write to struct memory using __device__ func()
-typedef struct hipLaunchKernelStruct18 {
-  char c1;
-  __device__ void setChar(char c) { c1 = c; }
-  __device__ int getChar() { return c1; }
-} hipLaunchKernelStruct_t18;
-
-// This test is to verity user defined STL, simple stack implementation
-typedef struct stackNode {
-    int data;
-    stackNode* nextNode = NULL;
-} stackNode_t;
-typedef struct hipLaunchKernelStruct19 {
-  stackNode_t* stack = NULL;
-  unsigned int size_ = 0;
-  void pushMe(int value) {  // not a device function, setting from host
-    stackNode_t* newNode;
-    HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&newNode),
-                         sizeof(stackNode_t)));
-    HIP_CHECK(hipMemset(&newNode->data, value, sizeof(stackNode_t)));
-    // newNode->data = value;
-    ++size_;
-    if (stack == NULL) {
-      stack = newNode;
-      return;
-    }
-    stackNode_t* currentHead = stack;
-    stack = newNode;
-    stack->nextNode = currentHead;
-    return;
-  }
-  __device__ void popMe() {
-    stackNode_t* currentHead = stack;
-    stack = stack->nextNode;
-    --size_;
-    // delete currentHead;  // no idea why delete not working
-    return;
-  }
-  int stackSize() {
-    return size_;
-  }
-} hipLaunchKernelStruct_t19;
-
-// This test is to verify out of order initalizer of struct elements
-// and access in-order, from device.
-typedef struct hipLaunchKernelStruct20 {
-  char name;
-  int age;
-  int rank;
-} hipLaunchKernelStruct_t20;
-
-// This test is to verify bit fields operations
-// the size should be 1Bytes
-typedef struct hipLaunchKernelStruct21 {
-  int i : 3;  // limiting bits to 3
-  int j : 2;  // limiting bits to 2
-} hipLaunchKernelStruct_t21;
-
-// Passing struct to a hipLaunchKernelGGL(),
-// read and write into the same struct
-__global__ void hipLaunchKernelStructFunc1(
-                    hipLaunchKernelStruct_t1 hipLaunchKernelStruct_,
-                    bool* result_d1) {
-    int x = blockIdx.x * blockDim.x + threadIdx.x;
-
-    // set the result to true if the condition met
-    result_d1[x] =  ((hipLaunchKernelStruct_.li == 1)
-                      && (hipLaunchKernelStruct_.lf == 1.0)
-                      && (hipLaunchKernelStruct_.result == false));
-}
-
-// Passing struct to a hipLaunchKernelGGL(), checks padding,
-// read and write into the same struct
-__global__ void hipLaunchKernelStructFunc2(
-                    hipLaunchKernelStruct_t2 hipLaunchKernelStruct_,
-                    bool* result_d2) {
-    int x = blockIdx.x * blockDim.x + threadIdx.x;
-
-    // set the result to true if the condition met
-    result_d2[x] =  ((hipLaunchKernelStruct_.c1 == 'a')
-                      && (hipLaunchKernelStruct_.l1 == 1.0)
-                      && (hipLaunchKernelStruct_.c2 == 'b')
-                      && (hipLaunchKernelStruct_.l2 == 2.0) );
-}
-
-// Passing struct to a hipLaunchKernelGGL(), checks padding,
-// read and write into the same struct
-__global__ void hipLaunchKernelStructFunc3(
-                    hipLaunchKernelStruct_t3 hipLaunchKernelStruct_,
-                    bool* result_d3) {
-    int x = blockIdx.x * blockDim.x + threadIdx.x;
-
-    // set the result to true if the condition met
-    result_d3[x] =  ((hipLaunchKernelStruct_.bf1 == 1)
-                     && (hipLaunchKernelStruct_.bf2 == 1)
-                     && (hipLaunchKernelStruct_.l1 == 1.0)
-                     && (hipLaunchKernelStruct_.bf3 == 1) );
-}
-
-// Passing empty struct to a hipLaunchKernelGGL(),
-// check the size of 1Byte, set  result_d4 to true if condition met
-__global__ void hipLaunchKernelStructFunc4(
-                    hipLaunchKernelStruct_t4 hipLaunchKernelStruct_,
-                    bool* result_d4) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-
-  // set the result to true if the condition met
-  result_d4[x] =  (sizeof(hipLaunchKernelStruct_) == 1);
-}
-
-// Passing struct with pointer object to a hipLaunchKernelGGL()
-__global__ void hipLaunchKernelStructFunc5(
-                    hipLaunchKernelStruct_t5 hipLaunchKernelStruct_,
-                    bool* result_d5) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-
-  // set the result to true if the condition met
-  result_d5[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
-                    && (*hipLaunchKernelStruct_.cp == 'p'));
-}
-
-// Passing struct which is aligned to 8Byte to a hipLaunchKernelGGL(),
-// set the result_d6 to true if condition met
-__global__ void hipLaunchKernelStructFunc6(
-                    hipLaunchKernelStruct_t6 hipLaunchKernelStruct_,
-                    bool* result_d6) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-
-  // set the result to true if the condition met
-  // get the address of the struct
-  // size_t(p)%8 will be 0 if aligned to 8Byte address space
-  int *p = reinterpret_cast<int*>(&hipLaunchKernelStruct_);
-  result_d6[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
-                    && (hipLaunchKernelStruct_.si == 1)
-                    && ((size_t(p))%8 ==0));
-}
-
-// Passing struct which is aligned to 16Byte,
-// set the result_d7 to true if condition met
-__global__ void hipLaunchKernelStructFunc7(
-                    hipLaunchKernelStruct_t7 hipLaunchKernelStruct_,
-                    bool* result_d7) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-
-  // set the result to true if the condition met
-  // get the address of the struct
-  // size_t(p)%16 will be 0 if aligned to 16Byte address space
-  int *p = reinterpret_cast<int*>(&hipLaunchKernelStruct_);
-  result_d7[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
-                    && (hipLaunchKernelStruct_.si == 1)
-                    && ((size_t(p))%16 ==0) );
-}
-
-// Passing struct which is packed & aligned to 4Byte,
-// set the result_d8 to true if condition met
-__global__ void hipLaunchKernelStructFunc8(
-                    hipLaunchKernelStruct_t8 hipLaunchKernelStruct_,
-                    bool* result_d8) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  // set the result to true if the condition met
-  // get the address of the xth element, struct[x],
-  // size_t(p)%4 will be 0 if aligned to 4Byte address space
-  int *p = reinterpret_cast<int*>(&hipLaunchKernelStruct_);
-  result_d8[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
-                    && (hipLaunchKernelStruct_.si == 1)
-                    && ((size_t(p))%4 ==0)
-                    && (sizeof(hipLaunchKernelStruct_) == 4));
-}
-
-// Passing struct which is packed only, as Sam suggested, should be 4Bytes
-// set the result_d8A to true if condition met
-__global__ void hipLaunchKernelStructFunc8A(
-                    hipLaunchKernelStruct_t8A hipLaunchKernelStruct_,
-                    bool* result_d8A) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  // set the result to true if the condition met
-  // this is packed struct
-  // the address will not be aglined in this case hence condition removed
-  // only sizeof(hipLaunchKernelStruct_) will be valided
-  result_d8A[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
-                    && (hipLaunchKernelStruct_.si == 1)
-                    && (sizeof(hipLaunchKernelStruct_) == 4));
-}
-
-// Passing struct which is aligned(4) only, as Sam suggested
-// , size should be 8Bytes, set the result_d8B to true if condition met
-__global__ void hipLaunchKernelStructFunc8B(
-                    hipLaunchKernelStruct_t8B hipLaunchKernelStruct_,
-                    bool* result_d8B) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  // set the result to true if the condition met
-  // get the address of the xth element, struct[x],
-  // size_t(p)%4 will be 0 if aligned to 4Byte address space
-  int *p = reinterpret_cast<int*>(&hipLaunchKernelStruct_);
-  result_d8B[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
-                    && (hipLaunchKernelStruct_.si == 1)
-                    && ((size_t(p))%8 == 0)
-                    && (sizeof(hipLaunchKernelStruct_) == 8));
-}
-
-// Passing struct with uint pointer object to a hipLaunchKernelGGL()
-__global__ void hipLaunchKernelStructFunc9(
-                    const hipLaunchKernelStruct_t9 hipLaunchKernelStruct_,
-                    bool* result_d9) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-
-  // set the result to true if the condition met
-  result_d9[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
-                    && (*hipLaunchKernelStruct_.ip == 1));
-}
-
-// Passing struct with stdint types object, uintN_t, to a hipLaunchKernelGGL()
-__global__ void hipLaunchKernelStructFunc10(
-                    hipLaunchKernelStruct_t10 hipLaunchKernelStruct_,
-                    bool* result_d10) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  // set the result to true if the condition met
-  result_d10[x] =  ((hipLaunchKernelStruct_.u64 == UINT64_MAX)
-                    && (hipLaunchKernelStruct_.u32 == 1)
-                    && (hipLaunchKernelStruct_.u8 == UINT8_MAX));
-}
-
-// Passing struct with volatile member, to a hipLaunchKernelGGL()
-__global__ void hipLaunchKernelStructFunc11(
-                    hipLaunchKernelStruct_t11 hipLaunchKernelStruct_,
-                    bool* result_d11) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  // set the result to true if the condition met
-  result_d11[x] =  ((hipLaunchKernelStruct_.i1 == 1)
-                    && (hipLaunchKernelStruct_.vint == 0));
-}
-
-// Passing struct with simple class obj, to a hipLaunchKernelGGL()
-__global__ void hipLaunchKernelStructFunc12(
-                    hipLaunchKernelStruct_t12 hipLaunchKernelStruct_,
-                    bool* result_d12) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  // set the result to true if the condition met
-  result_d12[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
-                    && (hipLaunchKernelStruct_.b.i == 0));
-}
-
-// Passing struct with simple __device__ func(), to a hipLaunchKernelGGL()
-__global__ void hipLaunchKernelStructFunc13(
-                    hipLaunchKernelStruct_t13 hipLaunchKernelStruct_,
-                    bool* result_d13) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  // set the result to true if the condition met
-  result_d13[x] =  ((hipLaunchKernelStruct_.i1 == 1)
-                    && (hipLaunchKernelStruct_.getvalue() == 1));
-}
-
-// Passing struct with array variable, write to from device
-__global__ void hipLaunchKernelStructFunc14(
-                    hipLaunchKernelStruct_t14 hipLaunchKernelStruct_,
-                    bool* result_d14) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  hipLaunchKernelStruct_.writeint[x] = 1;
-  // set the result to true if the condition met
-  result_d14[x] =  ((hipLaunchKernelStruct_.readint == 1)
-                    && (hipLaunchKernelStruct_.writeint[x] == 1));
-}
-
-// Passing struct with struct with dynamic memory, new int
-// the heap memory will be accessed from device
-__global__ void hipLaunchKernelStructFunc15(
-                    hipLaunchKernelStruct_t15 hipLaunchKernelStruct_,
-                    bool* result_d15) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  // set the result to true if the condition met
-  result_d15[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
-                     && (hipLaunchKernelStruct_.heapmem[x] == 1));
-}
-
-// Passing simple template struct
-__global__ void hipLaunchKernelStructFunc16(
-                    hipLaunchKernelStruct_t16<char> hipLaunchKernelStruct_,
-                    bool* result_d16) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  // set the result to true if the condition met
-  result_d16[x] =  (hipLaunchKernelStruct_.t1 == 'c');
-}
-
-// Passing simple explicit template struct
-__global__ void hipLaunchKernelStructFunc17(
-                    hipLaunchKernelStruct_t17<int> hipLaunchKernelStruct_,
-                    bool* result_d17) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  // set the result to true if the condition met
-  result_d17[x] =  (hipLaunchKernelStruct_.t1 == 1);
-}
-
-// Passing struct and write to struct memory using __device__ func()
-__global__ void hipLaunchKernelStructFunc18(
-                    hipLaunchKernelStruct_t18 hipLaunchKernelStruct_,
-                    bool* result_d18) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  hipLaunchKernelStruct_.setChar('c');
-  // set the result to true if the condition met
-  result_d18[x] =  (hipLaunchKernelStruct_.getChar() == 'c');
-}
-
-// Passing simple user defined stack implemenration,  using __device__ func()
-__global__ void hipLaunchKernelStructFunc19(
-                    hipLaunchKernelStruct_t19 hipLaunchKernelStruct_) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  // stack should be empty after the kernel execustion, verify on host side
-  hipLaunchKernelStruct_.popMe();
-}
-
-// Passing out of order initalized struct, access in-order
-__global__ void hipLaunchKernelStructFunc20(
-                    hipLaunchKernelStruct_t20 hipLaunchKernelStruct_,
-                    bool* result_d20) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  // accessing struct members in order
-  result_d20[x] = (hipLaunchKernelStruct_.name == 'A'
-  // strcmp(hipLaunchKernelStruct_.name, "AMD") -> strcmp is not broken
-                   && hipLaunchKernelStruct_.age == 42
-                   && hipLaunchKernelStruct_.rank == 2);
-}
-
-// Passing struct with bit fields
-__global__ void hipLaunchKernelStructFunc21(
-                    hipLaunchKernelStruct_t21 hipLaunchKernelStruct_,
-                    bool* result_d21) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  // accessing struct members in order
-  result_d21[x] = (hipLaunchKernelStruct_.i == 2
-                   && hipLaunchKernelStruct_.j == 0
-                   && (sizeof(hipLaunchKernelStruct_) == 1));
-}
-
-__global__ void vAdd(float* a) {}
-
-template<class T1, class T2>
-__global__ void myKernel(T1 a, T2 b) {}
-
-
-//---
-// Some wrapper macro for testing:
-#define WRAP(...) __VA_ARGS__
-
-#define MY_LAUNCH_MACRO(cmd, elapsed, quiet)                         \
-    do {                                                            \
-        HIP_CHECK(hipDeviceSynchronize());                                     \
-        cmd;                                                        \
-        HIP_CHECK(hipDeviceSynchronize());                                     \
-    } while (0);
-
-
-#define MY_LAUNCH(command, doTrace, msg)                            \
-    {                                                               \
-        if (doTrace) printf("TRACE: %s %s\n", msg, #command);       \
-        command;                                                    \
-    }
-
-
-#define MY_LAUNCH_WITH_PAREN(command, doTrace, msg)                 \
-    {                                                               \
-        if (doTrace) printf("TRACE: %s %s\n", msg, #command);       \
-        (command);                                                  \
-    }
-
-/**
-* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
-* @{
-* @ingroup KernelTest
-* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
-   std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
-* Method to invocate kernel functions
-*/
-
-/**
- * Test Description
- * ------------------------
- *    - Passing struct to a hipLaunchKernelGGL(),
- * read and write into the same struct
- *    - Test to verify by Passing Struct type, checks padding
- *    - Test to verify by Passing Struct type, checks padding, assigning integer to a char
- *    - Test to verify by Passing empty struct
- *    - Test to verify by Passing struct with pointer object to a hipLaunchKernelGGL()
- *    - Test to verify by Passing struct with aligned(8)
- *    - Test to verify by Passing struct with aligned(16)
- *    - Test to verify by Passing struct with packed aligned to 4Bytes
- *    - Test to verify by Passing struct with packed to 4Bytes
- *    - Test to verify by Passing struct with aligned(4) to 4Bytes, size is 8Bytes
- *    - Test to verify by Passing const struct object to a hipLaunchKernelGGL()
- *    - Test to verify by Passing struct with uintN_t as member variables
- *    - Test to verify by Passing struct with uintN_t as member variables
- *    - Test to verify by Passing struct with simple class object
- *    - Test to verify by Passing struct with simple __device__ func()
- *    - Test to verify by Passing struct with array variable, write to from device
- *    - Test to verify by Passing simple template struct
- *    - Test to verify by Passing simple explicit template struct
- *    - Test to verify by Passing struct with simple __device__ func() to struct memory
- *    - Test to verify by Passing struct which is initiazed out of order
- * accessing same elements in order from device
- *    - Test to verify by Passing struct with bit fields operation
- * accessing same elements in order from device
- *    - Test to verify by Passing the different hipLaunchParm options
-
- * Test source
- * ------------------------
- *    - catch/unit/kernel/hipLaunchParm.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.5
- */
-
-TEST_CASE("Unit_hipLaunchParm") {
-  hipMallocError = hipMalloc(reinterpret_cast<void**>(&result_d),
-                             BLOCK_DIM_SIZE*sizeof(bool));
-  hipHostMallocError = hipHostMalloc(reinterpret_cast<void**>(&result_h),
-                                     BLOCK_DIM_SIZE*sizeof(bool));
-  hipMemsetError = hipMemset(result_d, false, BLOCK_DIM_SIZE);
-
-  // Validating memory & initial value, for result_d, result_h
-  REQUIRE(hipMallocError == hipSuccess);
-  REQUIRE(hipHostMallocError == hipSuccess);
-  REQUIRE(hipMemsetError == hipSuccess);
-
-  SECTION("check access from device") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t1 hipLaunchKernelStruct_h1;
-    hipLaunchKernelStruct_h1.li = 1;
-    hipLaunchKernelStruct_h1.lf = 1.0;
-    hipLaunchKernelStruct_h1.result = false;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc1),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h1,
-                    result_d);
-    ResultValidation();
-  }
-
-  SECTION("check padding") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t2 hipLaunchKernelStruct_h2;
-    hipLaunchKernelStruct_h2.c1 = 'a';
-    hipLaunchKernelStruct_h2.l1 = 1.0;
-    hipLaunchKernelStruct_h2.c2 = 'b';
-    hipLaunchKernelStruct_h2.l2 = 2.0;
-    hipLaunchKernelStruct_h2.result = false;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc2),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h2,
-                    result_d);
-    ResultValidation();
-  }
-
-  SECTION("check padding assigning int to char") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t3 hipLaunchKernelStruct_h3;
-    hipLaunchKernelStruct_h3.bf1 = 1;
-    hipLaunchKernelStruct_h3.bf2 = 1;
-    hipLaunchKernelStruct_h3.l1 = 1.0;
-    hipLaunchKernelStruct_h3.bf3 = 1;
-    hipLaunchKernelStruct_h3.result = false;
-                // initialize to false, will be set to
-                // true if the struct size is 1Byte, from device size
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc3),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h3,
-                    result_d);
-    ResultValidation();
-  }
-
-  SECTION("Empty struct") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t4 hipLaunchKernelStruct_h4;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc4),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h4,
-                    result_d);
-    ResultValidation();
-  }
-
-  SECTION("Passing struct with pointer object") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t5 hipLaunchKernelStruct_h5;
-    char* cp_d5;  // This is passed as pointer to struct member
-    // allocating memory for char pointer on device
-    HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&cp_d5), sizeof(char)));
-    HIP_CHECK(hipMemset(cp_d5, 'p', sizeof(char)));
-    hipLaunchKernelStruct_h5.c1 = 'c';
-    hipLaunchKernelStruct_h5.cp = cp_d5;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc5),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h5,
-                    result_d);
-    ResultValidation();
-  }
-
-  SECTION("Passing struct with aligned(8)") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t6 hipLaunchKernelStruct_h6;
-    hipLaunchKernelStruct_h6.c1 = 'c';
-    hipLaunchKernelStruct_h6.si = 1;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc6),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h6,
-                    result_d);
-    // alignment is broken hence disabled the validation part
-    #if ENABLE_ALIGNMENT_TEST_SMALL_BAR
-    ResultValidation();
-    #endif
-  }
-
-  SECTION("Passing struct with aligned(16)") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t7 hipLaunchKernelStruct_h7;
-    hipLaunchKernelStruct_h7.c1 = 'c';
-    hipLaunchKernelStruct_h7.si = 1;
-    #if ENABLE_ALIGNMENT_TEST_SMALL_BAR  // This is broken on small bar
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc7),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h7,
-                    result_d);
-    ResultValidation();
-    #endif
-  }
-
-  SECTION("Passing struct with packed aligned to 4bytes") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t8 hipLaunchKernelStruct_h8;
-    hipLaunchKernelStruct_h8.c1 = 'c';
-    hipLaunchKernelStruct_h8.si = 1;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc8),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h8,
-                    result_d);
-    // packed member broken on large and small bar setup.
-    #if ENABLE_PACKED_TEST
-    ResultValidation();
-    #endif
-  }
-
-  SECTION("Passing struct with packed to 4Bytes") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t8A hipLaunchKernelStruct_h8A;
-    hipLaunchKernelStruct_h8A.c1 = 'c';
-    hipLaunchKernelStruct_h8A.si = 1;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc8A),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h8A,
-                    result_d);
-    // packed member broken on large and small bar setup.
-    #if ENABLE_PACKED_TEST
-    ResultValidation();
-    #endif
-  }
-
-  SECTION("Passing struct with aligned(4) to 4Bytes") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t8B hipLaunchKernelStruct_h8B;
-    hipLaunchKernelStruct_h8B.c1 = 'c';
-    hipLaunchKernelStruct_h8B.si = 1;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc8B),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h8B,
-                    result_d);
-    // alignment is broken hence disabled the validation part
-    #if ENABLE_ALIGNMENT_TEST_SMALL_BAR
-    ResultValidation();
-    #endif
-  }
-
-  SECTION("Passing const struct object") {
-    ResetValidationMem();
-    uint32_t* ip_d9;
-    // allocating memory for char pointer on device
-    HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&ip_d9), sizeof(uint32_t)));
-    HIP_CHECK(hipMemset(ip_d9, 1, sizeof(uint32_t)));
-    // ip_d9 passed as pointer to struct member, struct.ip = &ip_d9
-    const hipLaunchKernelStruct_t9 hipLaunchKernelStruct_h9 = {'c', ip_d9};
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc9),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h9,
-                    result_d);
-    #if ENABLE_DECLARE_INITIALIZATION_POINTER
-    ResultValidation();
-    #endif
-  }
-
-  SECTION("Passing struct with uintN_t") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t10 hipLaunchKernelStruct_h10;
-    hipLaunchKernelStruct_h10.u64 = UINT64_MAX;
-    hipLaunchKernelStruct_h10.u32 = 1;
-    hipLaunchKernelStruct_h10.u8 = UINT8_MAX;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc10),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h10,
-                    result_d);
-    ResultValidation();
-  }
-
-  SECTION("hipLaunchKernelStructFunc11") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t11 hipLaunchKernelStruct_h11;
-    hipLaunchKernelStruct_h11.i1 = 1;
-    hipLaunchKernelStruct_h11.vint = 0;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc11),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h11,
-                    result_d);
-    ResultValidation();
-  }
-
-  SECTION("Passing struct with simple class object") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t12 hipLaunchKernelStruct_h12;
-    hipLaunchKernelStruct_h12.c1 = 'c';
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc12),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h12,
-                    result_d);
-    #if ENABLE_CLASS_OBJ_ACCESS  // access class obj from device broken
-    // Validation part of the struct, hipLaunchKernelStructFunc12
-    ResultValidation();
-    #endif
-  }
-
-  SECTION("Passing struct with simple __device__ func()") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t13 hipLaunchKernelStruct_h13;
-    hipLaunchKernelStruct_h13.i1 = 1;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc13),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h13,
-                    result_d);
-    ResultValidation();
-  }
-
-  SECTION("Passing struct with array variable") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t14 hipLaunchKernelStruct_h14;
-    hipLaunchKernelStruct_h14.readint = 1;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc14),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h14,
-                    result_d);
-    ResultValidation();
-  }
-
-  SECTION("Passing struct with heap memory") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t15 hipLaunchKernelStruct_h15;
-    hipLaunchKernelStruct_h15.c1 = 'c';
-
-    #if ENABLE_HEAP_MEMORY_ACCESS  // causing page fault here,
-                                   // on small bar set
-    HIP_CHECK(hipMalloc(&hipLaunchKernelStruct_h15.heapmem,
-                       BLOCK_DIM_SIZE*sizeof(int)));
-    HIP_CHECK(hipMemset(&hipLaunchKernelStruct_h15.heapmem,
-                       0, BLOCK_DIM_SIZE));
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc15),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h15,
-                    result_d);
-    ResultValidation();
-    #endif
-  }
-
-  SECTION("Passing simple template struct") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t16<char> hipLaunchKernelStruct_h16;
-    hipLaunchKernelStruct_h16.t1 = 'c';
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc16),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h16,
-                    result_d);
-    ResultValidation();
-  }
-
-  SECTION("Passing simple explicit template struct") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t17<int> hipLaunchKernelStruct_h17;
-    hipLaunchKernelStruct_h17.t1 = 1;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc17),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h17,
-                    result_d);
-    ResultValidation();
-  }
-
-  SECTION("Passing struct with simple __device__ func()") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t18 hipLaunchKernelStruct_h18;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc18),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h18,
-                    result_d);
-    ResultValidation();
-  }
-
-  SECTION("Passing user defined stack") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t19 hipLaunchKernelStruct_h19;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc19),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h19);
-    #if ENABLE_USER_STL
-    // Validation part of the struct, hipLaunchKernelStructFunc19
-    HIPASSERT(hipLaunchKernelStruct_h19.stackSize() == 0);
-    #endif
-  }
-
-    // Test: Passing struct which is initiazed out of order
-    // accessing same elements in order from device
-  SECTION("Passing struct which is initiazed out of order") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t20 hipLaunchKernelStruct_h20;
-    hipLaunchKernelStruct_h20.name = 'A';
-    hipLaunchKernelStruct_h20.rank = 2;
-    hipLaunchKernelStruct_h20.age = 42;
-    bool *result_d20, *result_h20;
-    #if ENABLE_OUT_OF_ORDER_INITIALIZATION
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc20),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h20, result_d);
-    ResultValidation();
-    #endif
-  }
-
-  SECTION("Passing struct with bit fields operation") {
-    ResetValidationMem();
-    hipLaunchKernelStruct_t21 hipLaunchKernelStruct_h21 =
-    // out of order initalization
-                     {2, 0};
-    bool *result_d21, *result_h21;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc21),
-                    dim3(BLOCK_DIM_SIZE),
-                    dim3(1), 0, 0, hipLaunchKernelStruct_h21, result_d);
-    #if ENABLE_BIT_FIELDS
-    ResultValidation();
-    #endif
-  }
-
-  SECTION("Passing the different hipLaunchParm options") {
-    float* Ad;
-    HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), 1024));
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), size_t(1024), 1, 0, 0, Ad);
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), 1024, dim3(1), 0, 0, Ad);
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), dim3(1024), 1, 0, 0, Ad);
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), dim3(1024), dim3(1), 0, 0, Ad);
-
-    // Test: Passing macro to hipLaunchKernelGGL
-#define KERNEL_CONFIG  dim3(1024), dim3(1), 0, 0
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), KERNEL_CONFIG, Ad);
-
-    // Test: Same thing with templates:
-    int a;
-    float b;
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(myKernel<int, float>),
-                       KERNEL_CONFIG, a, b);
-
-#define TYPE_PARAM_CONFIG int, float
-    hipLaunchKernelGGL(HIP_KERNEL_NAME(myKernel<TYPE_PARAM_CONFIG>),
-                       KERNEL_CONFIG, a, b);
-
-    // Test: Passing hipLaunchKernelGGL inside another macro:
-    float e0;
-    MY_LAUNCH_MACRO(hipLaunchKernelGGL(vAdd, dim3(1024),
-                   dim3(1), 0, 0, Ad), e0, j);
-    MY_LAUNCH_MACRO(WRAP(hipLaunchKernelGGL(vAdd, dim3(1024),
-                   dim3(1), 0, 0, Ad)), e0, j);
-
-#ifdef EXTRA_PARENS_1
-    // Don't wrap hipLaunchKernelGGL in extra set of parens:
-    MY_LAUNCH_MACRO((hipLaunchKernelGGL(vAdd, dim3(1024),
-                    dim3(1), 0, 0, Ad)), e0, j);
-#endif
-
-    MY_LAUNCH(hipLaunchKernelGGL(vAdd, dim3(1024), dim3(1),
-              0, 0, Ad), true, "firstCall");
-    float* A;
-    float e1;
-    MY_LAUNCH_WITH_PAREN(hipMalloc(&A, 100), true, "launch2");
-
-#ifdef EXTRA_PARENS_2
-    // MY_LAUNCH_WITH_PAREN wraps cmd in () which can cause issues.
-    MY_LAUNCH_WITH_PAREN(hipLaunchKernelGGL(vAdd, dim3(1024),
-                         dim3(1), 0, 0, Ad), true, "firstCall");
-#endif
-  }
-  HIP_CHECK(hipHostFree(result_h));
-  HIP_CHECK(hipFree(result_d));
-}
-
-/**
-* End doxygen group KernelTest.
-* @}
-*/
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+ 
+#include <cstdint>
+
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#pragma clang diagnostic ignored "-Wunused-result"
+#pragma clang diagnostic ignored "-Wuninitialized"
+
+// Memory alignment is broken
+// Update: with latest changes the aligment is working fine, hence enabled
+#define ENABLE_ALIGNMENT_TEST_SMALL_BAR 1
+
+// Packed member atribute broken
+#define ENABLE_PACKED_TEST 0
+
+// Update: with latest changes struct class object
+// from device is working fine, hence enabled
+#define ENABLE_CLASS_OBJ_ACCESS 1
+
+// accessing dynamic/heap memory from device is broken
+#define ENABLE_HEAP_MEMORY_ACCESS 0
+
+// Update: with latest changes it's working hence enabled
+#define ENABLE_USER_STL 1
+
+// Update: with latest changes it's working hence enabled
+#define ENABLE_OUT_OF_ORDER_INITIALIZATION 1
+
+// Direct initialization of struct broken,
+// ip_d9 is a pointer, uint_t*, hipLaunchKernelStruct_h9 = {'c', ip_d9};
+#define ENABLE_DECLARE_INITIALIZATION_POINTER 0
+
+// Bit fields are broken
+#define ENABLE_BIT_FIELDS 0
+
+static const int  BLOCK_DIM_SIZE = 512;
+
+// allocate memory on device and host for result validation
+static bool *result_d, *result_h;
+
+static hipError_t hipMallocError = hipErrorUnknown;
+static hipError_t hipHostMallocError = hipErrorUnknown;
+static hipError_t hipMemsetError = hipErrorUnknown;
+
+static void ResultValidation() {
+  HIP_CHECK(hipMemcpy(result_h, result_d, BLOCK_DIM_SIZE*sizeof(bool),
+            hipMemcpyDeviceToHost));
+
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
+    REQUIRE(result_h[k] == true);
+  }
+  return;
+}
+
+// Segregating the reset part as it was causing a problem when i put inside
+// ResultValidation() function, the memory was not reset correctly for the
+// tests which were disabled.
+static void ResetValidationMem() {
+  // reset the memory to false to reuse it.
+  HIP_CHECK(hipMemset(result_d, false, BLOCK_DIM_SIZE));
+  HIP_CHECK(hipMemset(result_h, false, BLOCK_DIM_SIZE));
+  return;
+}
+
+// This test is to verify Struct with variables
+// support, read from device.
+typedef struct hipLaunchKernelStruct1 {
+  int li;  // local int
+  float lf;  // local float
+  bool result;  // local bool
+} hipLaunchKernelStruct_t1;
+
+// This test is to verify struct with padding, read from device
+typedef struct hipLaunchKernelStruct2 {
+  char c1;
+  int64_t l1;
+  char c2;
+  int64_t l2;
+  bool result;
+} hipLaunchKernelStruct_t2;
+
+// This test is to verify struct with padding, read from device
+typedef struct hipLaunchKernelStruct3 {
+  char bf1;
+  char bf2;
+  int64_t l1;
+  char bf3;
+  bool result;
+} hipLaunchKernelStruct_t3;
+
+// This test is to verify empty struct
+typedef struct hipLaunchKernelStruct4 {
+  // empty struct, size will be verified from device side,size 1Byte
+} hipLaunchKernelStruct_t4;
+
+// This test is to verify struct with pointer member variable.
+typedef struct hipLaunchKernelStruct5 {
+  char c1;
+  char* cp;  // char pointer
+} hipLaunchKernelStruct_t5;
+
+
+// This test is to verify struct with aligned(8),
+// right now it's broken on hcc & hip-clang
+typedef struct hipLaunchKernelStruct6 {
+  char c1;
+  int16_t si;
+} __attribute__((aligned(8)))  hipLaunchKernelStruct_t6;
+
+// This test is to verify struct with aligned(16),
+// right now it's brokenon hcc & hip-clang
+typedef struct hipLaunchKernelStruct7 {
+  char c1;
+  int16_t si;
+} __attribute__((aligned(16)))  hipLaunchKernelStruct_t7;
+
+// This test is to verify struct with packed & aligned,
+// size should be 4Bytes right now it's broken on hcc & hip-clang
+typedef struct hipLaunchKernelStruct8 {
+  char c1;
+  int16_t si;
+  bool b;
+}__attribute__((packed, aligned(4)))  hipLaunchKernelStruct_t8;
+
+// This test is to verify struct with packed, no alignment as Sam suggested
+// size should be 4Bytes, right now it's broken on hcc & hip-clang
+typedef struct hipLaunchKernelStruct8A {
+  char c1;
+  int16_t si;
+  bool b;
+}__attribute__((packed))  hipLaunchKernelStruct_t8A;
+
+// This test is to verify struct with alignment, no packing as Sam suggested
+// size should be 8Bytes as no packing, right now it's broken on hcc & hip-clang
+typedef struct hipLaunchKernelStruct8B {
+  char c1;
+  int16_t si;
+  bool b;
+}__attribute__((aligned(8)))  hipLaunchKernelStruct_t8B;
+
+// This test is to verify const struct object
+typedef struct hipLaunchKernelStruct9 {
+  char c1;
+  uint32_t* ip;  // uint pointer
+} hipLaunchKernelStruct_t9;
+
+// This test is to verify struct with stdint types, uintN_t
+typedef struct hipLaunchKernelStruct10 {
+  uint64_t u64;
+  uint32_t u32;
+  uint8_t u8;
+} hipLaunchKernelStruct_t10;
+
+// This test is to verify struct with volatile member
+typedef struct hipLaunchKernelStruct11 {
+  int i1;
+  volatile unsigned int vint;
+} hipLaunchKernelStruct_t11;
+
+// This test is to verify struct with simple class object
+class base {
+ public:
+    int i = 0;
+    base() {}
+};
+typedef struct hipLaunchKernelStruct12 {
+  base b;
+  char c1;
+} hipLaunchKernelStruct_t12;
+
+// This test is to verify struct with __device__ func() attribute
+typedef struct hipLaunchKernelStruct13 {
+  int i1;
+  __device__ int getvalue() { return i1; }
+} hipLaunchKernelStruct_t13;
+
+// This test is to verify struct with array variable,
+// write to from device
+typedef struct hipLaunchKernelStruct14 {
+  int readint;
+  int writeint[BLOCK_DIM_SIZE];  // will write to this from device
+} hipLaunchKernelStruct_t14;
+
+// This test is to verify struct with dynamic memory, new int
+// the heap memory will be accessed from device
+typedef struct hipLaunchKernelStruct15 {
+  char c1;
+  int* heapmem;  // allocated using hipMalloc()
+} hipLaunchKernelStruct_t15;
+
+// This test is to verify simple template struct
+template<typename T>
+struct hipLaunchKernelStruct_t16 {
+  T t1;
+};
+
+// This test is to verify simple explicity template struct
+template<typename T> struct hipLaunchKernelStruct_t17 {};
+template<>  // explicit template
+struct hipLaunchKernelStruct_t17<int> {
+  int t1;
+};
+
+// This test is to verity write to struct memory using __device__ func()
+typedef struct hipLaunchKernelStruct18 {
+  char c1;
+  __device__ void setChar(char c) { c1 = c; }
+  __device__ int getChar() { return c1; }
+} hipLaunchKernelStruct_t18;
+
+// This test is to verity user defined STL, simple stack implementation
+typedef struct stackNode {
+    int data;
+    stackNode* nextNode = NULL;
+} stackNode_t;
+typedef struct hipLaunchKernelStruct19 {
+  stackNode_t* stack = NULL;
+  unsigned int size_ = 0;
+  void pushMe(int value) {  // not a device function, setting from host
+    stackNode_t* newNode;
+    HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&newNode),
+                         sizeof(stackNode_t)));
+    HIP_CHECK(hipMemset(&newNode->data, value, sizeof(stackNode_t)));
+    // newNode->data = value;
+    ++size_;
+    if (stack == NULL) {
+      stack = newNode;
+      return;
+    }
+    stackNode_t* currentHead = stack;
+    stack = newNode;
+    stack->nextNode = currentHead;
+    return;
+  }
+  __device__ void popMe() {
+    stackNode_t* currentHead = stack;
+    stack = stack->nextNode;
+    --size_;
+    // delete currentHead;  // no idea why delete not working
+    return;
+  }
+  int stackSize() {
+    return size_;
+  }
+} hipLaunchKernelStruct_t19;
+
+// This test is to verify out of order initalizer of struct elements
+// and access in-order, from device.
+typedef struct hipLaunchKernelStruct20 {
+  char name;
+  int age;
+  int rank;
+} hipLaunchKernelStruct_t20;
+
+// This test is to verify bit fields operations
+// the size should be 1Bytes
+typedef struct hipLaunchKernelStruct21 {
+  int i : 3;  // limiting bits to 3
+  int j : 2;  // limiting bits to 2
+} hipLaunchKernelStruct_t21;
+
+// Passing struct to a hipLaunchKernelGGL(),
+// read and write into the same struct
+__global__ void hipLaunchKernelStructFunc1(
+                    hipLaunchKernelStruct_t1 hipLaunchKernelStruct_,
+                    bool* result_d1) {
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+
+    // set the result to true if the condition met
+    result_d1[x] =  ((hipLaunchKernelStruct_.li == 1)
+                      && (hipLaunchKernelStruct_.lf == 1.0)
+                      && (hipLaunchKernelStruct_.result == false));
+}
+
+// Passing struct to a hipLaunchKernelGGL(), checks padding,
+// read and write into the same struct
+__global__ void hipLaunchKernelStructFunc2(
+                    hipLaunchKernelStruct_t2 hipLaunchKernelStruct_,
+                    bool* result_d2) {
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+
+    // set the result to true if the condition met
+    result_d2[x] =  ((hipLaunchKernelStruct_.c1 == 'a')
+                      && (hipLaunchKernelStruct_.l1 == 1.0)
+                      && (hipLaunchKernelStruct_.c2 == 'b')
+                      && (hipLaunchKernelStruct_.l2 == 2.0) );
+}
+
+// Passing struct to a hipLaunchKernelGGL(), checks padding,
+// read and write into the same struct
+__global__ void hipLaunchKernelStructFunc3(
+                    hipLaunchKernelStruct_t3 hipLaunchKernelStruct_,
+                    bool* result_d3) {
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+
+    // set the result to true if the condition met
+    result_d3[x] =  ((hipLaunchKernelStruct_.bf1 == 1)
+                     && (hipLaunchKernelStruct_.bf2 == 1)
+                     && (hipLaunchKernelStruct_.l1 == 1.0)
+                     && (hipLaunchKernelStruct_.bf3 == 1) );
+}
+
+// Passing empty struct to a hipLaunchKernelGGL(),
+// check the size of 1Byte, set  result_d4 to true if condition met
+__global__ void hipLaunchKernelStructFunc4(
+                    hipLaunchKernelStruct_t4 hipLaunchKernelStruct_,
+                    bool* result_d4) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+
+  // set the result to true if the condition met
+  result_d4[x] =  (sizeof(hipLaunchKernelStruct_) == 1);
+}
+
+// Passing struct with pointer object to a hipLaunchKernelGGL()
+__global__ void hipLaunchKernelStructFunc5(
+                    hipLaunchKernelStruct_t5 hipLaunchKernelStruct_,
+                    bool* result_d5) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+
+  // set the result to true if the condition met
+  result_d5[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
+                    && (*hipLaunchKernelStruct_.cp == 'p'));
+}
+
+// Passing struct which is aligned to 8Byte to a hipLaunchKernelGGL(),
+// set the result_d6 to true if condition met
+__global__ void hipLaunchKernelStructFunc6(
+                    hipLaunchKernelStruct_t6 hipLaunchKernelStruct_,
+                    bool* result_d6) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+
+  // set the result to true if the condition met
+  // get the address of the struct
+  // size_t(p)%8 will be 0 if aligned to 8Byte address space
+  int *p = reinterpret_cast<int*>(&hipLaunchKernelStruct_);
+  result_d6[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
+                    && (hipLaunchKernelStruct_.si == 1)
+                    && ((size_t(p))%8 ==0));
+}
+
+// Passing struct which is aligned to 16Byte,
+// set the result_d7 to true if condition met
+__global__ void hipLaunchKernelStructFunc7(
+                    hipLaunchKernelStruct_t7 hipLaunchKernelStruct_,
+                    bool* result_d7) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+
+  // set the result to true if the condition met
+  // get the address of the struct
+  // size_t(p)%16 will be 0 if aligned to 16Byte address space
+  int *p = reinterpret_cast<int*>(&hipLaunchKernelStruct_);
+  result_d7[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
+                    && (hipLaunchKernelStruct_.si == 1)
+                    && ((size_t(p))%16 ==0) );
+}
+
+// Passing struct which is packed & aligned to 4Byte,
+// set the result_d8 to true if condition met
+__global__ void hipLaunchKernelStructFunc8(
+                    hipLaunchKernelStruct_t8 hipLaunchKernelStruct_,
+                    bool* result_d8) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  // set the result to true if the condition met
+  // get the address of the xth element, struct[x],
+  // size_t(p)%4 will be 0 if aligned to 4Byte address space
+  int *p = reinterpret_cast<int*>(&hipLaunchKernelStruct_);
+  result_d8[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
+                    && (hipLaunchKernelStruct_.si == 1)
+                    && ((size_t(p))%4 ==0)
+                    && (sizeof(hipLaunchKernelStruct_) == 4));
+}
+
+// Passing struct which is packed only, as Sam suggested, should be 4Bytes
+// set the result_d8A to true if condition met
+__global__ void hipLaunchKernelStructFunc8A(
+                    hipLaunchKernelStruct_t8A hipLaunchKernelStruct_,
+                    bool* result_d8A) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  // set the result to true if the condition met
+  // this is packed struct
+  // the address will not be aglined in this case hence condition removed
+  // only sizeof(hipLaunchKernelStruct_) will be valided
+  result_d8A[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
+                    && (hipLaunchKernelStruct_.si == 1)
+                    && (sizeof(hipLaunchKernelStruct_) == 4));
+}
+
+// Passing struct which is aligned(4) only, as Sam suggested
+// , size should be 8Bytes, set the result_d8B to true if condition met
+__global__ void hipLaunchKernelStructFunc8B(
+                    hipLaunchKernelStruct_t8B hipLaunchKernelStruct_,
+                    bool* result_d8B) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  // set the result to true if the condition met
+  // get the address of the xth element, struct[x],
+  // size_t(p)%4 will be 0 if aligned to 4Byte address space
+  int *p = reinterpret_cast<int*>(&hipLaunchKernelStruct_);
+  result_d8B[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
+                    && (hipLaunchKernelStruct_.si == 1)
+                    && ((size_t(p))%8 == 0)
+                    && (sizeof(hipLaunchKernelStruct_) == 8));
+}
+
+// Passing struct with uint pointer object to a hipLaunchKernelGGL()
+__global__ void hipLaunchKernelStructFunc9(
+                    const hipLaunchKernelStruct_t9 hipLaunchKernelStruct_,
+                    bool* result_d9) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+
+  // set the result to true if the condition met
+  result_d9[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
+                    && (*hipLaunchKernelStruct_.ip == 1));
+}
+
+// Passing struct with stdint types object, uintN_t, to a hipLaunchKernelGGL()
+__global__ void hipLaunchKernelStructFunc10(
+                    hipLaunchKernelStruct_t10 hipLaunchKernelStruct_,
+                    bool* result_d10) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  // set the result to true if the condition met
+  result_d10[x] =  ((hipLaunchKernelStruct_.u64 == UINT64_MAX)
+                    && (hipLaunchKernelStruct_.u32 == 1)
+                    && (hipLaunchKernelStruct_.u8 == UINT8_MAX));
+}
+
+// Passing struct with volatile member, to a hipLaunchKernelGGL()
+__global__ void hipLaunchKernelStructFunc11(
+                    hipLaunchKernelStruct_t11 hipLaunchKernelStruct_,
+                    bool* result_d11) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  // set the result to true if the condition met
+  result_d11[x] =  ((hipLaunchKernelStruct_.i1 == 1)
+                    && (hipLaunchKernelStruct_.vint == 0));
+}
+
+// Passing struct with simple class obj, to a hipLaunchKernelGGL()
+__global__ void hipLaunchKernelStructFunc12(
+                    hipLaunchKernelStruct_t12 hipLaunchKernelStruct_,
+                    bool* result_d12) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  // set the result to true if the condition met
+  result_d12[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
+                    && (hipLaunchKernelStruct_.b.i == 0));
+}
+
+// Passing struct with simple __device__ func(), to a hipLaunchKernelGGL()
+__global__ void hipLaunchKernelStructFunc13(
+                    hipLaunchKernelStruct_t13 hipLaunchKernelStruct_,
+                    bool* result_d13) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  // set the result to true if the condition met
+  result_d13[x] =  ((hipLaunchKernelStruct_.i1 == 1)
+                    && (hipLaunchKernelStruct_.getvalue() == 1));
+}
+
+// Passing struct with array variable, write to from device
+__global__ void hipLaunchKernelStructFunc14(
+                    hipLaunchKernelStruct_t14 hipLaunchKernelStruct_,
+                    bool* result_d14) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  hipLaunchKernelStruct_.writeint[x] = 1;
+  // set the result to true if the condition met
+  result_d14[x] =  ((hipLaunchKernelStruct_.readint == 1)
+                    && (hipLaunchKernelStruct_.writeint[x] == 1));
+}
+
+// Passing struct with struct with dynamic memory, new int
+// the heap memory will be accessed from device
+__global__ void hipLaunchKernelStructFunc15(
+                    hipLaunchKernelStruct_t15 hipLaunchKernelStruct_,
+                    bool* result_d15) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  // set the result to true if the condition met
+  result_d15[x] =  ((hipLaunchKernelStruct_.c1 == 'c')
+                     && (hipLaunchKernelStruct_.heapmem[x] == 1));
+}
+
+// Passing simple template struct
+__global__ void hipLaunchKernelStructFunc16(
+                    hipLaunchKernelStruct_t16<char> hipLaunchKernelStruct_,
+                    bool* result_d16) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  // set the result to true if the condition met
+  result_d16[x] =  (hipLaunchKernelStruct_.t1 == 'c');
+}
+
+// Passing simple explicit template struct
+__global__ void hipLaunchKernelStructFunc17(
+                    hipLaunchKernelStruct_t17<int> hipLaunchKernelStruct_,
+                    bool* result_d17) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  // set the result to true if the condition met
+  result_d17[x] =  (hipLaunchKernelStruct_.t1 == 1);
+}
+
+// Passing struct and write to struct memory using __device__ func()
+__global__ void hipLaunchKernelStructFunc18(
+                    hipLaunchKernelStruct_t18 hipLaunchKernelStruct_,
+                    bool* result_d18) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  hipLaunchKernelStruct_.setChar('c');
+  // set the result to true if the condition met
+  result_d18[x] =  (hipLaunchKernelStruct_.getChar() == 'c');
+}
+
+// Passing simple user defined stack implemenration,  using __device__ func()
+__global__ void hipLaunchKernelStructFunc19(
+                    hipLaunchKernelStruct_t19 hipLaunchKernelStruct_) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  // stack should be empty after the kernel execustion, verify on host side
+  hipLaunchKernelStruct_.popMe();
+}
+
+// Passing out of order initalized struct, access in-order
+__global__ void hipLaunchKernelStructFunc20(
+                    hipLaunchKernelStruct_t20 hipLaunchKernelStruct_,
+                    bool* result_d20) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  // accessing struct members in order
+  result_d20[x] = (hipLaunchKernelStruct_.name == 'A'
+  // strcmp(hipLaunchKernelStruct_.name, "AMD") -> strcmp is not broken
+                   && hipLaunchKernelStruct_.age == 42
+                   && hipLaunchKernelStruct_.rank == 2);
+}
+
+// Passing struct with bit fields
+__global__ void hipLaunchKernelStructFunc21(
+                    hipLaunchKernelStruct_t21 hipLaunchKernelStruct_,
+                    bool* result_d21) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  // accessing struct members in order
+  result_d21[x] = (hipLaunchKernelStruct_.i == 2
+                   && hipLaunchKernelStruct_.j == 0
+                   && (sizeof(hipLaunchKernelStruct_) == 1));
+}
+
+__global__ void vAdd(float* a) {}
+
+template<class T1, class T2>
+__global__ void myKernel(T1 a, T2 b) {}
+
+
+//---
+// Some wrapper macro for testing:
+#define WRAP(...) __VA_ARGS__
+
+#define MY_LAUNCH_MACRO(cmd, elapsed, quiet)                         \
+    do {                                                            \
+        HIP_CHECK(hipDeviceSynchronize());                                     \
+        cmd;                                                        \
+        HIP_CHECK(hipDeviceSynchronize());                                     \
+    } while (0);
+
+
+#define MY_LAUNCH(command, doTrace, msg)                            \
+    {                                                               \
+        if (doTrace) printf("TRACE: %s %s\n", msg, #command);       \
+        command;                                                    \
+    }
+
+
+#define MY_LAUNCH_WITH_PAREN(command, doTrace, msg)                 \
+    {                                                               \
+        if (doTrace) printf("TRACE: %s %s\n", msg, #command);       \
+        (command);                                                  \
+    }
+
+/**
+* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
+* @{
+* @ingroup KernelTest
+* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
+   std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
+* Method to invocate kernel functions
+*/
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Passing struct to a hipLaunchKernelGGL(),
+ * read and write into the same struct
+ *    - Test to verify by Passing Struct type, checks padding
+ *    - Test to verify by Passing Struct type, checks padding, assigning integer to a char
+ *    - Test to verify by Passing empty struct
+ *    - Test to verify by Passing struct with pointer object to a hipLaunchKernelGGL()
+ *    - Test to verify by Passing struct with aligned(8)
+ *    - Test to verify by Passing struct with aligned(16)
+ *    - Test to verify by Passing struct with packed aligned to 4Bytes
+ *    - Test to verify by Passing struct with packed to 4Bytes
+ *    - Test to verify by Passing struct with aligned(4) to 4Bytes, size is 8Bytes
+ *    - Test to verify by Passing const struct object to a hipLaunchKernelGGL()
+ *    - Test to verify by Passing struct with uintN_t as member variables
+ *    - Test to verify by Passing struct with uintN_t as member variables
+ *    - Test to verify by Passing struct with simple class object
+ *    - Test to verify by Passing struct with simple __device__ func()
+ *    - Test to verify by Passing struct with array variable, write to from device
+ *    - Test to verify by Passing simple template struct
+ *    - Test to verify by Passing simple explicit template struct
+ *    - Test to verify by Passing struct with simple __device__ func() to struct memory
+ *    - Test to verify by Passing struct which is initiazed out of order
+ * accessing same elements in order from device
+ *    - Test to verify by Passing struct with bit fields operation
+ * accessing same elements in order from device
+ *    - Test to verify by Passing the different hipLaunchParm options
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/kernel/hipLaunchParm.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.5
+ */
+
+TEST_CASE("Unit_hipLaunchParm") {
+  hipMallocError = hipMalloc(reinterpret_cast<void**>(&result_d),
+                             BLOCK_DIM_SIZE*sizeof(bool));
+  hipHostMallocError = hipHostMalloc(reinterpret_cast<void**>(&result_h),
+                                     BLOCK_DIM_SIZE*sizeof(bool));
+  hipMemsetError = hipMemset(result_d, false, BLOCK_DIM_SIZE);
+
+  // Validating memory & initial value, for result_d, result_h
+  REQUIRE(hipMallocError == hipSuccess);
+  REQUIRE(hipHostMallocError == hipSuccess);
+  REQUIRE(hipMemsetError == hipSuccess);
+
+  SECTION("check access from device") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t1 hipLaunchKernelStruct_h1;
+    hipLaunchKernelStruct_h1.li = 1;
+    hipLaunchKernelStruct_h1.lf = 1.0;
+    hipLaunchKernelStruct_h1.result = false;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc1),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h1,
+                    result_d);
+    ResultValidation();
+  }
+
+  SECTION("check padding") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t2 hipLaunchKernelStruct_h2;
+    hipLaunchKernelStruct_h2.c1 = 'a';
+    hipLaunchKernelStruct_h2.l1 = 1.0;
+    hipLaunchKernelStruct_h2.c2 = 'b';
+    hipLaunchKernelStruct_h2.l2 = 2.0;
+    hipLaunchKernelStruct_h2.result = false;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc2),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h2,
+                    result_d);
+    ResultValidation();
+  }
+
+  SECTION("check padding assigning int to char") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t3 hipLaunchKernelStruct_h3;
+    hipLaunchKernelStruct_h3.bf1 = 1;
+    hipLaunchKernelStruct_h3.bf2 = 1;
+    hipLaunchKernelStruct_h3.l1 = 1.0;
+    hipLaunchKernelStruct_h3.bf3 = 1;
+    hipLaunchKernelStruct_h3.result = false;
+                // initialize to false, will be set to
+                // true if the struct size is 1Byte, from device size
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc3),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h3,
+                    result_d);
+    ResultValidation();
+  }
+
+  SECTION("Empty struct") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t4 hipLaunchKernelStruct_h4;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc4),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h4,
+                    result_d);
+    ResultValidation();
+  }
+
+  SECTION("Passing struct with pointer object") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t5 hipLaunchKernelStruct_h5;
+    char* cp_d5;  // This is passed as pointer to struct member
+    // allocating memory for char pointer on device
+    HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&cp_d5), sizeof(char)));
+    HIP_CHECK(hipMemset(cp_d5, 'p', sizeof(char)));
+    hipLaunchKernelStruct_h5.c1 = 'c';
+    hipLaunchKernelStruct_h5.cp = cp_d5;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc5),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h5,
+                    result_d);
+    ResultValidation();
+  }
+
+  SECTION("Passing struct with aligned(8)") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t6 hipLaunchKernelStruct_h6;
+    hipLaunchKernelStruct_h6.c1 = 'c';
+    hipLaunchKernelStruct_h6.si = 1;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc6),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h6,
+                    result_d);
+    // alignment is broken hence disabled the validation part
+    #if ENABLE_ALIGNMENT_TEST_SMALL_BAR
+    ResultValidation();
+    #endif
+  }
+
+  SECTION("Passing struct with aligned(16)") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t7 hipLaunchKernelStruct_h7;
+    hipLaunchKernelStruct_h7.c1 = 'c';
+    hipLaunchKernelStruct_h7.si = 1;
+    #if ENABLE_ALIGNMENT_TEST_SMALL_BAR  // This is broken on small bar
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc7),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h7,
+                    result_d);
+    ResultValidation();
+    #endif
+  }
+
+  SECTION("Passing struct with packed aligned to 4bytes") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t8 hipLaunchKernelStruct_h8;
+    hipLaunchKernelStruct_h8.c1 = 'c';
+    hipLaunchKernelStruct_h8.si = 1;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc8),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h8,
+                    result_d);
+    // packed member broken on large and small bar setup.
+    #if ENABLE_PACKED_TEST
+    ResultValidation();
+    #endif
+  }
+
+  SECTION("Passing struct with packed to 4Bytes") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t8A hipLaunchKernelStruct_h8A;
+    hipLaunchKernelStruct_h8A.c1 = 'c';
+    hipLaunchKernelStruct_h8A.si = 1;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc8A),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h8A,
+                    result_d);
+    // packed member broken on large and small bar setup.
+    #if ENABLE_PACKED_TEST
+    ResultValidation();
+    #endif
+  }
+
+  SECTION("Passing struct with aligned(4) to 4Bytes") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t8B hipLaunchKernelStruct_h8B;
+    hipLaunchKernelStruct_h8B.c1 = 'c';
+    hipLaunchKernelStruct_h8B.si = 1;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc8B),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h8B,
+                    result_d);
+    // alignment is broken hence disabled the validation part
+    #if ENABLE_ALIGNMENT_TEST_SMALL_BAR
+    ResultValidation();
+    #endif
+  }
+
+  SECTION("Passing const struct object") {
+    ResetValidationMem();
+    uint32_t* ip_d9;
+    // allocating memory for char pointer on device
+    HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&ip_d9), sizeof(uint32_t)));
+    HIP_CHECK(hipMemset(ip_d9, 1, sizeof(uint32_t)));
+    // ip_d9 passed as pointer to struct member, struct.ip = &ip_d9
+    const hipLaunchKernelStruct_t9 hipLaunchKernelStruct_h9 = {'c', ip_d9};
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc9),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h9,
+                    result_d);
+    #if ENABLE_DECLARE_INITIALIZATION_POINTER
+    ResultValidation();
+    #endif
+  }
+
+  SECTION("Passing struct with uintN_t") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t10 hipLaunchKernelStruct_h10;
+    hipLaunchKernelStruct_h10.u64 = UINT64_MAX;
+    hipLaunchKernelStruct_h10.u32 = 1;
+    hipLaunchKernelStruct_h10.u8 = UINT8_MAX;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc10),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h10,
+                    result_d);
+    ResultValidation();
+  }
+
+  SECTION("hipLaunchKernelStructFunc11") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t11 hipLaunchKernelStruct_h11;
+    hipLaunchKernelStruct_h11.i1 = 1;
+    hipLaunchKernelStruct_h11.vint = 0;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc11),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h11,
+                    result_d);
+    ResultValidation();
+  }
+
+  SECTION("Passing struct with simple class object") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t12 hipLaunchKernelStruct_h12;
+    hipLaunchKernelStruct_h12.c1 = 'c';
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc12),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h12,
+                    result_d);
+    #if ENABLE_CLASS_OBJ_ACCESS  // access class obj from device broken
+    // Validation part of the struct, hipLaunchKernelStructFunc12
+    ResultValidation();
+    #endif
+  }
+
+  SECTION("Passing struct with simple __device__ func()") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t13 hipLaunchKernelStruct_h13;
+    hipLaunchKernelStruct_h13.i1 = 1;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc13),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h13,
+                    result_d);
+    ResultValidation();
+  }
+
+  SECTION("Passing struct with array variable") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t14 hipLaunchKernelStruct_h14;
+    hipLaunchKernelStruct_h14.readint = 1;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc14),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h14,
+                    result_d);
+    ResultValidation();
+  }
+
+  SECTION("Passing struct with heap memory") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t15 hipLaunchKernelStruct_h15;
+    hipLaunchKernelStruct_h15.c1 = 'c';
+
+    #if ENABLE_HEAP_MEMORY_ACCESS  // causing page fault here,
+                                   // on small bar set
+    HIP_CHECK(hipMalloc(&hipLaunchKernelStruct_h15.heapmem,
+                       BLOCK_DIM_SIZE*sizeof(int)));
+    HIP_CHECK(hipMemset(&hipLaunchKernelStruct_h15.heapmem,
+                       0, BLOCK_DIM_SIZE));
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc15),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h15,
+                    result_d);
+    ResultValidation();
+    #endif
+  }
+
+  SECTION("Passing simple template struct") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t16<char> hipLaunchKernelStruct_h16;
+    hipLaunchKernelStruct_h16.t1 = 'c';
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc16),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h16,
+                    result_d);
+    ResultValidation();
+  }
+
+  SECTION("Passing simple explicit template struct") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t17<int> hipLaunchKernelStruct_h17;
+    hipLaunchKernelStruct_h17.t1 = 1;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc17),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h17,
+                    result_d);
+    ResultValidation();
+  }
+
+  SECTION("Passing struct with simple __device__ func()") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t18 hipLaunchKernelStruct_h18;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc18),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h18,
+                    result_d);
+    ResultValidation();
+  }
+
+  SECTION("Passing user defined stack") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t19 hipLaunchKernelStruct_h19;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc19),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h19);
+    #if ENABLE_USER_STL
+    // Validation part of the struct, hipLaunchKernelStructFunc19
+    HIPASSERT(hipLaunchKernelStruct_h19.stackSize() == 0);
+    #endif
+  }
+
+    // Test: Passing struct which is initiazed out of order
+    // accessing same elements in order from device
+  SECTION("Passing struct which is initiazed out of order") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t20 hipLaunchKernelStruct_h20;
+    hipLaunchKernelStruct_h20.name = 'A';
+    hipLaunchKernelStruct_h20.rank = 2;
+    hipLaunchKernelStruct_h20.age = 42;
+    bool *result_d20, *result_h20;
+    #if ENABLE_OUT_OF_ORDER_INITIALIZATION
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc20),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h20, result_d);
+    ResultValidation();
+    #endif
+  }
+
+  SECTION("Passing struct with bit fields operation") {
+    ResetValidationMem();
+    hipLaunchKernelStruct_t21 hipLaunchKernelStruct_h21 =
+    // out of order initalization
+                     {2, 0};
+    bool *result_d21, *result_h21;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(hipLaunchKernelStructFunc21),
+                    dim3(BLOCK_DIM_SIZE),
+                    dim3(1), 0, 0, hipLaunchKernelStruct_h21, result_d);
+    #if ENABLE_BIT_FIELDS
+    ResultValidation();
+    #endif
+  }
+
+  SECTION("Passing the different hipLaunchParm options") {
+    float* Ad;
+    HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&Ad), 1024));
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), size_t(1024), 1, 0, 0, Ad);
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), 1024, dim3(1), 0, 0, Ad);
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), dim3(1024), 1, 0, 0, Ad);
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), dim3(1024), dim3(1), 0, 0, Ad);
+
+    // Test: Passing macro to hipLaunchKernelGGL
+#define KERNEL_CONFIG  dim3(1024), dim3(1), 0, 0
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(vAdd), KERNEL_CONFIG, Ad);
+
+    // Test: Same thing with templates:
+    int a;
+    float b;
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(myKernel<int, float>),
+                       KERNEL_CONFIG, a, b);
+
+#define TYPE_PARAM_CONFIG int, float
+    hipLaunchKernelGGL(HIP_KERNEL_NAME(myKernel<TYPE_PARAM_CONFIG>),
+                       KERNEL_CONFIG, a, b);
+
+    // Test: Passing hipLaunchKernelGGL inside another macro:
+    float e0;
+    MY_LAUNCH_MACRO(hipLaunchKernelGGL(vAdd, dim3(1024),
+                   dim3(1), 0, 0, Ad), e0, j);
+    MY_LAUNCH_MACRO(WRAP(hipLaunchKernelGGL(vAdd, dim3(1024),
+                   dim3(1), 0, 0, Ad)), e0, j);
+
+#ifdef EXTRA_PARENS_1
+    // Don't wrap hipLaunchKernelGGL in extra set of parens:
+    MY_LAUNCH_MACRO((hipLaunchKernelGGL(vAdd, dim3(1024),
+                    dim3(1), 0, 0, Ad)), e0, j);
+#endif
+
+    MY_LAUNCH(hipLaunchKernelGGL(vAdd, dim3(1024), dim3(1),
+              0, 0, Ad), true, "firstCall");
+    float* A;
+    float e1;
+    MY_LAUNCH_WITH_PAREN(hipMalloc(&A, 100), true, "launch2");
+
+#ifdef EXTRA_PARENS_2
+    // MY_LAUNCH_WITH_PAREN wraps cmd in () which can cause issues.
+    MY_LAUNCH_WITH_PAREN(hipLaunchKernelGGL(vAdd, dim3(1024),
+                         dim3(1), 0, 0, Ad), true, "firstCall");
+#endif
+  }
+  HIP_CHECK(hipHostFree(result_h));
+  HIP_CHECK(hipFree(result_d));
+}
+
+/**
+* End doxygen group KernelTest.
+* @}
+*/
diff --git a/projects/hip-tests/catch/unit/kernel/hipLaunchParmFunctor.cc b/projects/hip-tests/catch/unit/kernel/hipLaunchParmFunctor.cc
index a453dbbde6..679bce5355 100644
--- a/projects/hip-tests/catch/unit/kernel/hipLaunchParmFunctor.cc
+++ b/projects/hip-tests/catch/unit/kernel/hipLaunchParmFunctor.cc
@@ -1,464 +1,464 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
- 
-
-class HipFunctorTests {
- public:
-    // Test that a class functor can be passed to hiplaunchparam
-    // and can be used in kernel
-    void TestForSimpleClassFunctor(void);
-    // Test that a templated class functor can be passed to hiplaunchparam
-    // and can be used in kernel
-    void TestForClassTemplateFunctor(void);
-    // Test that a class functor object ptr  can be passed to hiplaunchparam
-    // and can be used in kernel
-    void TestForClassObjPtrFunctor(void);
-    // Test that a class object containing functor can be passed
-    // to hiplaunchparam and can be used in kernel
-    void TestForFunctorContainInClassObj(void);
-    // Test that a stuct functor can be passed to hiplaunchparam
-    // and can be used in kernel
-    void TestForSimpleStructFunctor(void);
-    // Test that a stuct functor object ptr  can be passed to hiplaunchparam
-    // and can be used in kernel
-    void TestForStructObjPtrFunctor(void);
-    // Test that a templated struct functor can be passed to hiplaunchparam
-    // and can be used in kernel
-    void TestForStructTemplateFunctor(void);
-    // Test that a struct object containing functor can be
-    // passed to hiplaunchparam and can be used in kernel
-    void TestForFunctorContainInStructObj(void);
-};
-
-static const int BLOCK_DIM_SIZE = 1024;
-static const int THREADS_PER_BLOCK = 1;
-
-// class functor tests
-
-// Simple doubler Functor
-class DoublerFunctor{
- public:
-    __device__ int operator()(int x) { return x * 2;}
-};
-
-// simple doubler functor passed to kernel
-__global__ void DoublerFunctorKernel(
-                    DoublerFunctor doubler_,
-                    bool* deviceResult) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  int result = doubler_(5);
-  deviceResult[x] = (result == 10);
-}
-
-void HipFunctorTests::TestForSimpleClassFunctor(void) {
-  DoublerFunctor doubler;
-  bool *deviceResults, *hostResults;
-  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
-    // initialize to false, will be set to
-    // true if the functor is called in device code
-    hostResults[k] = false;
-  }
-
-  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyHostToDevice));
-  hipLaunchKernelGGL(DoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
-                  dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
-
-  // Validation part of TestForSimpleClassFunctor
-  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyDeviceToHost));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
-    REQUIRE(hostResults[k] == true);
-  HIP_CHECK(hipHostFree(hostResults));
-  HIP_CHECK(hipFree(deviceResults));
-}
-
-// pointer functor passed to kernel
-__global__ void PtrDoublerFunctorKernel(
-                    DoublerFunctor *doubler_,
-                    bool* deviceResult) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  int result = (*doubler_)(5);
-  deviceResult[x] = (result == 10);
-}
-
-void HipFunctorTests::TestForClassObjPtrFunctor(void) {
-  DoublerFunctor* ptrdoubler = new DoublerFunctor[sizeof(int)];
-  bool *deviceResults, *hostResults;
-  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
-    // initialize to false, will be set to
-    // true if the functor is called in device code
-    hostResults[k] = false;
-  }
-
-  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyHostToDevice));
-  hipLaunchKernelGGL(PtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
-                  dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
-
-  // Validation part of TestForClassObjPtrFunctor
-  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyDeviceToHost));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
-    REQUIRE(hostResults[k] == true);
-  HIP_CHECK(hipHostFree(hostResults));
-  HIP_CHECK(hipFree(deviceResults));
-  delete[] ptrdoubler;
-}
-
-class compare {
- public:
-    template<typename T1, typename T2>
-    __device__ bool operator()(const T1& v1, const T2& v2) {
-       return v1 > v2;
-    }
-};
-
-// template functor passed to kernel
-__global__ void TemplateFunctorKernel(
-                    compare compare_,
-                    bool* deviceResult) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  deviceResult[x] = compare_(2.2, 2.1);
-  deviceResult[x] = compare_(2, 1);
-  deviceResult[x] = compare_('b', 'a');
-}
-
-void HipFunctorTests::TestForClassTemplateFunctor(void) {
-  compare comparefunctor;
-  bool *deviceResults, *hostResults;
-  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
-    // initialize to false, will be set to
-    // true if the functor is called in device code
-    hostResults[k] = false;
-  }
-
-  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyHostToDevice));
-  hipLaunchKernelGGL(TemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
-                  dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
-
-  // Validation part of TestForClassTemplateFunctor
-  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyDeviceToHost));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
-    REQUIRE(hostResults[k] == true);
-  HIP_CHECK(hipHostFree(hostResults));
-  HIP_CHECK(hipFree(deviceResults));
-}
-
-
-// Doubler calculator
-class DoublerCalculator {
- public:
-    int a, result;
-    // fucntor contained in class object
-    DoublerFunctor doubler;
-};
-
-// doubler functor conatined in class obj passed to kernel
-__global__ void DoublerCalculatorFunctorKernel(
-                    DoublerCalculator doubler_,
-                    bool* deviceResult) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  int result = doubler_.doubler(doubler_.a);
-  deviceResult[x] = (doubler_.result == result);
-}
-
-void HipFunctorTests::TestForFunctorContainInClassObj(void) {
-  DoublerCalculator Doubler;
-  bool *deviceResults, *hostResults;
-  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
-    // initialize to false, will be set to
-    // true if the functor is called in device code
-    hostResults[k] = false;
-  }
-
-  Doubler.a = 5;
-  Doubler.result = 10;
-  // pass comparefunctor to  hipLaunchParm
-
-  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyHostToDevice));
-  hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
-                  dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
-
-  // Validation part of TestForStructTemplateFunctor
-  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyDeviceToHost));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
-    REQUIRE(hostResults[k] == true);
-  HIP_CHECK(hipHostFree(hostResults));
-  HIP_CHECK(hipFree(deviceResults));
-}
-
-// Struct functor tests
-
-// Simple doubler Functor
-struct sDoublerFunctor {
- public:
-    __device__ int operator()(int x) { return x * 2;}
-};
-
-
-// simple sturct doubler functor passed to kernel
-__global__ void structDoublerFunctorKernel(
-                    sDoublerFunctor doubler_,
-                    bool* deviceResult) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  int result = doubler_(5);
-  deviceResult[x] = (result == 10);
-}
-
-void HipFunctorTests::TestForSimpleStructFunctor(void) {
-  sDoublerFunctor doubler;
-  bool *deviceResults, *hostResults;
-  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
-    // initialize to false, will be set to
-    // true if the functor is called in device code
-    hostResults[k] = false;
-  }
-
-  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyHostToDevice));
-  hipLaunchKernelGGL(structDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
-                  dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
-
-  // Validation part of TestForSimpleStructFunctor
-  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyDeviceToHost));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
-    REQUIRE(hostResults[k] == true);
-  HIP_CHECK(hipHostFree(hostResults));
-  HIP_CHECK(hipFree(deviceResults));
-}
-
-// ptr functor passed to kernel
-__global__ void structPtrDoublerFunctorKernel(
-                    sDoublerFunctor *doubler_,
-                    bool* deviceResult) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  int result = (*doubler_)(5);
-  deviceResult[x] = (result == 10);
-}
-
-void HipFunctorTests::TestForStructObjPtrFunctor(void) {
-  sDoublerFunctor* ptrdoubler = new sDoublerFunctor[sizeof(int)];
-  bool *deviceResults, *hostResults;
-  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
-    // initialize to false, will be set to
-    // true if the functor is called in device code
-    hostResults[k] = false;
-  }
-
-  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyHostToDevice));
-  hipLaunchKernelGGL(structPtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
-                  dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
-
-  // Validation part of TestForStructObjPtrFunctor
-  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyDeviceToHost));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
-    REQUIRE(hostResults[k] == true);
-  HIP_CHECK(hipHostFree(hostResults));
-  HIP_CHECK(hipFree(deviceResults));
-  delete[] ptrdoubler;
-}
-
-struct sCompare {
- public:
-    template< typename T1, typename T2 >
-    __device__ bool operator()(const T1& v1, const T2& v2) {
-    return v1 > v2;
-    }
-};
-
-// template functor passed to kernel
-__global__ void structTemplateFunctorKernel(
-                    sCompare compare_,
-                    bool* deviceResult) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  deviceResult[x] = compare_(2.2, 2.1);
-  deviceResult[x] = compare_(2, 1);
-  deviceResult[x] = compare_('b', 'a');
-}
-
-void HipFunctorTests::TestForStructTemplateFunctor(void) {
-  sCompare comparefunctor;
-  bool *deviceResults, *hostResults;
-  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
-    // initialize to false, will be set to
-    // true if the functor is called in device code
-    hostResults[k] = false;
-  }
-
-  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyHostToDevice));
-
-  // pass comparefunctor to  hipLaunchKernelGGL
-  hipLaunchKernelGGL(structTemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
-                  dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
-
-  // Validation part of TestForStructTemplateFunctor
-  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyDeviceToHost));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
-    REQUIRE(hostResults[k] == true);
-  HIP_CHECK(hipHostFree(hostResults));
-  HIP_CHECK(hipFree(deviceResults));
-}
-
-// Doubler calculator struct
-struct sDoublerCalculator {
- public:
-    int a, result;
-    // fucntor contained in class object
-    DoublerFunctor doubler;
-};
-
-
-
-// doubler functor contained in struct passed to kernel
-__global__ void DoublerCalculatorFunctorKernel(
-                    sDoublerCalculator doubler_,
-                    bool* deviceResult) {
-  int x = blockIdx.x * blockDim.x + threadIdx.x;
-  int result = doubler_.doubler(doubler_.a);
-  deviceResult[x] = (doubler_.result == result);
-}
-
-void HipFunctorTests::TestForFunctorContainInStructObj(void) {
-  sDoublerCalculator Doubler;
-  bool *deviceResults, *hostResults;
-  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
-    // initialize to false, will be set to
-    // true if the functor is called in device code
-    hostResults[k] = false;
-  }
-
-  Doubler.a = 5;
-  Doubler.result = 10;
-  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyHostToDevice));
-
-
-  // pass comparefunctor to  hipLaunchKernelGGL
-  hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
-                  dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
-
-  // Validation part of TestForStructTemplateFunctor
-  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
-           hipMemcpyDeviceToHost));
-  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
-    REQUIRE(hostResults[k] == true);
-  HIP_CHECK(hipHostFree(hostResults));
-  HIP_CHECK(hipFree(deviceResults));
-}
-
-/**
-* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
-* @{
-* @ingroup KernelTest
-* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
-   std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
-* Method to invocate kernel functions
-*/
-
-/**
- * Test Description
- * ------------------------
- *    - Test that a class functor can be passed to hiplaunchparam
- * and can be used in kernel.
- *    - Test that a templated class functor can be passed to hiplaunchparam
- * and can be used in kernel.
- *    - Test that a class functor object ptr  can be passed to hiplaunchparam
- * and can be used in kernel.
- *    - Test that a class object containing functor can be passed to hiplaunchparam
- * and can be used in kernel
- *    - Test that a stuct functor can be passed to hiplaunchparam
- * and can be used in kernel
- *    - Test that a stuct functor object ptr  can be passed to hiplaunchparam
- * and can be used in kernel
- *    - Test that a templated struct functor can be passed to hiplaunchparam
- * and can be used in kernel
- *    - Test that a struct object containing functor can be passed to hiplaunchparam
- * and can be used in kernel
-
- * Test source
- * ------------------------
- *    - catch/unit/kernel/hipLaunchParmFunctor.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.5
- */
-
-TEST_CASE("Unit_hipLaunchParmFunctor") {
-  HipFunctorTests FunctorTests;
-
-  SECTION("test for simple class functor") {
-    FunctorTests.TestForSimpleClassFunctor();
-  }
-  SECTION("test for class objptr functor") {
-    FunctorTests.TestForClassObjPtrFunctor();
-  }
-  SECTION("test for class templete functor") {
-    FunctorTests.TestForClassTemplateFunctor();
-  }
-  SECTION("test for simple struct functor") {
-    FunctorTests.TestForSimpleStructFunctor();
-  }
-  SECTION("test for struct objptr functor") {
-    FunctorTests.TestForStructObjPtrFunctor();
-  }
-  SECTION("test for struct templete functor") {
-    FunctorTests.TestForStructTemplateFunctor();
-  }
-  SECTION("test for functor contain in classobj") {
-    FunctorTests.TestForFunctorContainInClassObj();
-  }
-  SECTION("test for functor contain in structobj") {
-    FunctorTests.TestForFunctorContainInStructObj();
-  }
-}
-
-/**
-* End doxygen group KernelTest.
-* @}
-*/
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+ 
+
+class HipFunctorTests {
+ public:
+    // Test that a class functor can be passed to hiplaunchparam
+    // and can be used in kernel
+    void TestForSimpleClassFunctor(void);
+    // Test that a templated class functor can be passed to hiplaunchparam
+    // and can be used in kernel
+    void TestForClassTemplateFunctor(void);
+    // Test that a class functor object ptr  can be passed to hiplaunchparam
+    // and can be used in kernel
+    void TestForClassObjPtrFunctor(void);
+    // Test that a class object containing functor can be passed
+    // to hiplaunchparam and can be used in kernel
+    void TestForFunctorContainInClassObj(void);
+    // Test that a stuct functor can be passed to hiplaunchparam
+    // and can be used in kernel
+    void TestForSimpleStructFunctor(void);
+    // Test that a stuct functor object ptr  can be passed to hiplaunchparam
+    // and can be used in kernel
+    void TestForStructObjPtrFunctor(void);
+    // Test that a templated struct functor can be passed to hiplaunchparam
+    // and can be used in kernel
+    void TestForStructTemplateFunctor(void);
+    // Test that a struct object containing functor can be
+    // passed to hiplaunchparam and can be used in kernel
+    void TestForFunctorContainInStructObj(void);
+};
+
+static const int BLOCK_DIM_SIZE = 1024;
+static const int THREADS_PER_BLOCK = 1;
+
+// class functor tests
+
+// Simple doubler Functor
+class DoublerFunctor{
+ public:
+    __device__ int operator()(int x) { return x * 2;}
+};
+
+// simple doubler functor passed to kernel
+__global__ void DoublerFunctorKernel(
+                    DoublerFunctor doubler_,
+                    bool* deviceResult) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int result = doubler_(5);
+  deviceResult[x] = (result == 10);
+}
+
+void HipFunctorTests::TestForSimpleClassFunctor(void) {
+  DoublerFunctor doubler;
+  bool *deviceResults, *hostResults;
+  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
+    // initialize to false, will be set to
+    // true if the functor is called in device code
+    hostResults[k] = false;
+  }
+
+  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyHostToDevice));
+  hipLaunchKernelGGL(DoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
+                  dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
+
+  // Validation part of TestForSimpleClassFunctor
+  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyDeviceToHost));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
+    REQUIRE(hostResults[k] == true);
+  HIP_CHECK(hipHostFree(hostResults));
+  HIP_CHECK(hipFree(deviceResults));
+}
+
+// pointer functor passed to kernel
+__global__ void PtrDoublerFunctorKernel(
+                    DoublerFunctor *doubler_,
+                    bool* deviceResult) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int result = (*doubler_)(5);
+  deviceResult[x] = (result == 10);
+}
+
+void HipFunctorTests::TestForClassObjPtrFunctor(void) {
+  DoublerFunctor* ptrdoubler = new DoublerFunctor[sizeof(int)];
+  bool *deviceResults, *hostResults;
+  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
+    // initialize to false, will be set to
+    // true if the functor is called in device code
+    hostResults[k] = false;
+  }
+
+  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyHostToDevice));
+  hipLaunchKernelGGL(PtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
+                  dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
+
+  // Validation part of TestForClassObjPtrFunctor
+  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyDeviceToHost));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
+    REQUIRE(hostResults[k] == true);
+  HIP_CHECK(hipHostFree(hostResults));
+  HIP_CHECK(hipFree(deviceResults));
+  delete[] ptrdoubler;
+}
+
+class compare {
+ public:
+    template<typename T1, typename T2>
+    __device__ bool operator()(const T1& v1, const T2& v2) {
+       return v1 > v2;
+    }
+};
+
+// template functor passed to kernel
+__global__ void TemplateFunctorKernel(
+                    compare compare_,
+                    bool* deviceResult) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  deviceResult[x] = compare_(2.2, 2.1);
+  deviceResult[x] = compare_(2, 1);
+  deviceResult[x] = compare_('b', 'a');
+}
+
+void HipFunctorTests::TestForClassTemplateFunctor(void) {
+  compare comparefunctor;
+  bool *deviceResults, *hostResults;
+  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
+    // initialize to false, will be set to
+    // true if the functor is called in device code
+    hostResults[k] = false;
+  }
+
+  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyHostToDevice));
+  hipLaunchKernelGGL(TemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
+                  dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
+
+  // Validation part of TestForClassTemplateFunctor
+  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyDeviceToHost));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
+    REQUIRE(hostResults[k] == true);
+  HIP_CHECK(hipHostFree(hostResults));
+  HIP_CHECK(hipFree(deviceResults));
+}
+
+
+// Doubler calculator
+class DoublerCalculator {
+ public:
+    int a, result;
+    // fucntor contained in class object
+    DoublerFunctor doubler;
+};
+
+// doubler functor conatined in class obj passed to kernel
+__global__ void DoublerCalculatorFunctorKernel(
+                    DoublerCalculator doubler_,
+                    bool* deviceResult) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int result = doubler_.doubler(doubler_.a);
+  deviceResult[x] = (doubler_.result == result);
+}
+
+void HipFunctorTests::TestForFunctorContainInClassObj(void) {
+  DoublerCalculator Doubler;
+  bool *deviceResults, *hostResults;
+  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
+    // initialize to false, will be set to
+    // true if the functor is called in device code
+    hostResults[k] = false;
+  }
+
+  Doubler.a = 5;
+  Doubler.result = 10;
+  // pass comparefunctor to  hipLaunchParm
+
+  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyHostToDevice));
+  hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
+                  dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
+
+  // Validation part of TestForStructTemplateFunctor
+  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyDeviceToHost));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
+    REQUIRE(hostResults[k] == true);
+  HIP_CHECK(hipHostFree(hostResults));
+  HIP_CHECK(hipFree(deviceResults));
+}
+
+// Struct functor tests
+
+// Simple doubler Functor
+struct sDoublerFunctor {
+ public:
+    __device__ int operator()(int x) { return x * 2;}
+};
+
+
+// simple sturct doubler functor passed to kernel
+__global__ void structDoublerFunctorKernel(
+                    sDoublerFunctor doubler_,
+                    bool* deviceResult) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int result = doubler_(5);
+  deviceResult[x] = (result == 10);
+}
+
+void HipFunctorTests::TestForSimpleStructFunctor(void) {
+  sDoublerFunctor doubler;
+  bool *deviceResults, *hostResults;
+  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
+    // initialize to false, will be set to
+    // true if the functor is called in device code
+    hostResults[k] = false;
+  }
+
+  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyHostToDevice));
+  hipLaunchKernelGGL(structDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
+                  dim3(THREADS_PER_BLOCK), 0, 0, doubler, deviceResults);
+
+  // Validation part of TestForSimpleStructFunctor
+  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyDeviceToHost));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
+    REQUIRE(hostResults[k] == true);
+  HIP_CHECK(hipHostFree(hostResults));
+  HIP_CHECK(hipFree(deviceResults));
+}
+
+// ptr functor passed to kernel
+__global__ void structPtrDoublerFunctorKernel(
+                    sDoublerFunctor *doubler_,
+                    bool* deviceResult) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int result = (*doubler_)(5);
+  deviceResult[x] = (result == 10);
+}
+
+void HipFunctorTests::TestForStructObjPtrFunctor(void) {
+  sDoublerFunctor* ptrdoubler = new sDoublerFunctor[sizeof(int)];
+  bool *deviceResults, *hostResults;
+  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
+    // initialize to false, will be set to
+    // true if the functor is called in device code
+    hostResults[k] = false;
+  }
+
+  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyHostToDevice));
+  hipLaunchKernelGGL(structPtrDoublerFunctorKernel, dim3(BLOCK_DIM_SIZE),
+                  dim3(THREADS_PER_BLOCK), 0, 0, ptrdoubler, deviceResults);
+
+  // Validation part of TestForStructObjPtrFunctor
+  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyDeviceToHost));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
+    REQUIRE(hostResults[k] == true);
+  HIP_CHECK(hipHostFree(hostResults));
+  HIP_CHECK(hipFree(deviceResults));
+  delete[] ptrdoubler;
+}
+
+struct sCompare {
+ public:
+    template< typename T1, typename T2 >
+    __device__ bool operator()(const T1& v1, const T2& v2) {
+    return v1 > v2;
+    }
+};
+
+// template functor passed to kernel
+__global__ void structTemplateFunctorKernel(
+                    sCompare compare_,
+                    bool* deviceResult) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  deviceResult[x] = compare_(2.2, 2.1);
+  deviceResult[x] = compare_(2, 1);
+  deviceResult[x] = compare_('b', 'a');
+}
+
+void HipFunctorTests::TestForStructTemplateFunctor(void) {
+  sCompare comparefunctor;
+  bool *deviceResults, *hostResults;
+  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
+    // initialize to false, will be set to
+    // true if the functor is called in device code
+    hostResults[k] = false;
+  }
+
+  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyHostToDevice));
+
+  // pass comparefunctor to  hipLaunchKernelGGL
+  hipLaunchKernelGGL(structTemplateFunctorKernel, dim3(BLOCK_DIM_SIZE),
+                  dim3(THREADS_PER_BLOCK), 0, 0, comparefunctor, deviceResults);
+
+  // Validation part of TestForStructTemplateFunctor
+  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyDeviceToHost));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
+    REQUIRE(hostResults[k] == true);
+  HIP_CHECK(hipHostFree(hostResults));
+  HIP_CHECK(hipFree(deviceResults));
+}
+
+// Doubler calculator struct
+struct sDoublerCalculator {
+ public:
+    int a, result;
+    // fucntor contained in class object
+    DoublerFunctor doubler;
+};
+
+
+
+// doubler functor contained in struct passed to kernel
+__global__ void DoublerCalculatorFunctorKernel(
+                    sDoublerCalculator doubler_,
+                    bool* deviceResult) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  int result = doubler_.doubler(doubler_.a);
+  deviceResult[x] = (doubler_.result == result);
+}
+
+void HipFunctorTests::TestForFunctorContainInStructObj(void) {
+  sDoublerCalculator Doubler;
+  bool *deviceResults, *hostResults;
+  HIP_CHECK(hipMalloc(&deviceResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  HIP_CHECK(hipHostMalloc(&hostResults, BLOCK_DIM_SIZE*sizeof(bool)));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k) {
+    // initialize to false, will be set to
+    // true if the functor is called in device code
+    hostResults[k] = false;
+  }
+
+  Doubler.a = 5;
+  Doubler.result = 10;
+  HIP_CHECK(hipMemcpy(deviceResults, hostResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyHostToDevice));
+
+
+  // pass comparefunctor to  hipLaunchKernelGGL
+  hipLaunchKernelGGL(DoublerCalculatorFunctorKernel, dim3(BLOCK_DIM_SIZE),
+                  dim3(THREADS_PER_BLOCK), 0, 0, Doubler, deviceResults);
+
+  // Validation part of TestForStructTemplateFunctor
+  HIP_CHECK(hipMemcpy(hostResults, deviceResults, BLOCK_DIM_SIZE*sizeof(bool),
+           hipMemcpyDeviceToHost));
+  for (int k = 0; k < BLOCK_DIM_SIZE; ++k)
+    REQUIRE(hostResults[k] == true);
+  HIP_CHECK(hipHostFree(hostResults));
+  HIP_CHECK(hipFree(deviceResults));
+}
+
+/**
+* @addtogroup hipLaunchKernelGGL hipLaunchKernelGGL
+* @{
+* @ingroup KernelTest
+* `void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
+   std::uint32_t sharedMemBytes, hipStream_t stream, Args... args)` -
+* Method to invocate kernel functions
+*/
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Test that a class functor can be passed to hiplaunchparam
+ * and can be used in kernel.
+ *    - Test that a templated class functor can be passed to hiplaunchparam
+ * and can be used in kernel.
+ *    - Test that a class functor object ptr  can be passed to hiplaunchparam
+ * and can be used in kernel.
+ *    - Test that a class object containing functor can be passed to hiplaunchparam
+ * and can be used in kernel
+ *    - Test that a stuct functor can be passed to hiplaunchparam
+ * and can be used in kernel
+ *    - Test that a stuct functor object ptr  can be passed to hiplaunchparam
+ * and can be used in kernel
+ *    - Test that a templated struct functor can be passed to hiplaunchparam
+ * and can be used in kernel
+ *    - Test that a struct object containing functor can be passed to hiplaunchparam
+ * and can be used in kernel
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/kernel/hipLaunchParmFunctor.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.5
+ */
+
+TEST_CASE("Unit_hipLaunchParmFunctor") {
+  HipFunctorTests FunctorTests;
+
+  SECTION("test for simple class functor") {
+    FunctorTests.TestForSimpleClassFunctor();
+  }
+  SECTION("test for class objptr functor") {
+    FunctorTests.TestForClassObjPtrFunctor();
+  }
+  SECTION("test for class templete functor") {
+    FunctorTests.TestForClassTemplateFunctor();
+  }
+  SECTION("test for simple struct functor") {
+    FunctorTests.TestForSimpleStructFunctor();
+  }
+  SECTION("test for struct objptr functor") {
+    FunctorTests.TestForStructObjPtrFunctor();
+  }
+  SECTION("test for struct templete functor") {
+    FunctorTests.TestForStructTemplateFunctor();
+  }
+  SECTION("test for functor contain in classobj") {
+    FunctorTests.TestForFunctorContainInClassObj();
+  }
+  SECTION("test for functor contain in structobj") {
+    FunctorTests.TestForFunctorContainInStructObj();
+  }
+}
+
+/**
+* End doxygen group KernelTest.
+* @}
+*/
diff --git a/projects/hip-tests/catch/unit/memory/hipSVMTestSharedAddressSpaceFineGrain.cpp b/projects/hip-tests/catch/unit/memory/hipSVMTestSharedAddressSpaceFineGrain.cpp
index e3144dd596..c4d9fd3854 100644
--- a/projects/hip-tests/catch/unit/memory/hipSVMTestSharedAddressSpaceFineGrain.cpp
+++ b/projects/hip-tests/catch/unit/memory/hipSVMTestSharedAddressSpaceFineGrain.cpp
@@ -119,7 +119,7 @@ void verify_linked_lists_on_device(hipStream_t stream, Node* pNodes,
                                           unsigned int* pNumCorrect, unsigned int numLists,
                                           unsigned int ListLength) {
   *pNumCorrect = 0;     // reset numCorrect to zero
- 
+
   verify_linked_lists_on_device<<<(numLists + 255) / 256, 256, 0, stream>>>(pNodes, pNumCorrect,
                                                                      ListLength);
 
diff --git a/projects/hip-tests/catch/unit/p2p/CMakeLists.txt b/projects/hip-tests/catch/unit/p2p/CMakeLists.txt
index d24910daf2..7170294cb8 100644
--- a/projects/hip-tests/catch/unit/p2p/CMakeLists.txt
+++ b/projects/hip-tests/catch/unit/p2p/CMakeLists.txt
@@ -1,24 +1,24 @@
-# Common Tests - Test independent of all platforms
-# moved hipDeviceGetP2PAttribute.cc from /catch/unit/device to
-# /catch/unit/p2p folder and its dependent files.
-set(TEST_SRC
-    hipDeviceGetP2PAttribute.cc
-)
-
-# only for AMD
-if(HIP_PLATFORM MATCHES "amd")
-  set(AMD_SRC
-    hipP2pLinkTypeAndHopFunc.cc
-  )
-  set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
-endif()
-
-set_source_files_properties(hipDeviceGetP2PAttribute.cc PROPERTIES COMPILE_FLAGS -std=c++17)
-
-add_executable(hipDeviceGetP2PAttribute_exe EXCLUDE_FROM_ALL hipDeviceGetP2PAttribute_exe.cc)
-
-hip_add_exe_to_target(NAME p2pTests
-                      TEST_SRC ${TEST_SRC}
-                      TEST_TARGET_NAME build_tests)
-
-add_dependencies(build_tests hipDeviceGetP2PAttribute_exe)
+# Common Tests - Test independent of all platforms
+# moved hipDeviceGetP2PAttribute.cc from /catch/unit/device to
+# /catch/unit/p2p folder and its dependent files.
+set(TEST_SRC
+    hipDeviceGetP2PAttribute.cc
+)
+
+# only for AMD
+if(HIP_PLATFORM MATCHES "amd")
+  set(AMD_SRC
+    hipP2pLinkTypeAndHopFunc.cc
+  )
+  set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
+endif()
+
+set_source_files_properties(hipDeviceGetP2PAttribute.cc PROPERTIES COMPILE_FLAGS -std=c++17)
+
+add_executable(hipDeviceGetP2PAttribute_exe EXCLUDE_FROM_ALL hipDeviceGetP2PAttribute_exe.cc)
+
+hip_add_exe_to_target(NAME p2pTests
+                      TEST_SRC ${TEST_SRC}
+                      TEST_TARGET_NAME build_tests)
+
+add_dependencies(build_tests hipDeviceGetP2PAttribute_exe)
diff --git a/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc b/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc
index 71e0c6f0c7..7624cf0507 100644
--- a/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc
+++ b/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc
@@ -1,356 +1,356 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include "hipP2pLinkTypeAndHopFunc.h"
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
- 
-#ifdef __linux__
-#include <unistd.h>
-#include <sys/wait.h>
-#include <dlfcn.h>
-#endif
-#include <vector>
-#define MAX_SIZE 30
-#define VISIBLE_DEVICE 0
-
-/**
- * Fetches Gpu device count
- */
-#ifdef __linux__
-void getDeviceCount(int *pdevCnt) {
-  int fd[2], val = 0;
-  pid_t childpid;
-  // create pipe descriptors
-  pipe(fd);
-  // disable visible_devices env from shell
-  unsetenv("ROCR_VISIBLE_DEVICES");
-  unsetenv("HIP_VISIBLE_DEVICES");
-
-  childpid = fork();
-  if (childpid > 0) {  // Parent
-    close(fd[1]);
-    // parent will wait to read the device cnt
-    read(fd[0], &val, sizeof(val));
-    // close the read-descriptor
-    close(fd[0]);
-    // wait for child exit
-    wait(NULL);
-    *pdevCnt = val;
-  } else if (!childpid) {  // Child
-    int devCnt = 1;
-    // writing only, no need for read-descriptor
-    close(fd[0]);
-    HIP_CHECK(hipGetDeviceCount(&devCnt));
-    // send the value on the write-descriptor:
-    write(fd[1], &devCnt, sizeof(devCnt));
-    // close the write descriptor:
-    close(fd[1]);
-    exit(0);
-  } else {  // failure
-    *pdevCnt = 1;
-    return;
-  }
-}
-
-bool testMaskedDevice(int actualNumGPUs) {
-  bool testResult = true;
-  int fd[2];
-  pipe(fd);
-
-  pid_t cPid;
-  cPid = fork();
-  if (cPid == 0) {  // child
-    hipError_t err;
-    char visibleDeviceString[MAX_SIZE] = {};
-    snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
-    // disable visible_devices env from shell
-    unsetenv("ROCR_VISIBLE_DEVICES");
-    unsetenv("HIP_VISIBLE_DEVICES");
-    setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
-    setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
-    uint32_t linktype;
-    uint32_t hopcount;
-    for (int count = 1;
-        count < actualNumGPUs; count++) {
-      err = hipExtGetLinkTypeAndHopCount(VISIBLE_DEVICE,
-            VISIBLE_DEVICE+count, &linktype, &hopcount);
-      REQUIRE(err == hipSuccess);
-    }
-    close(fd[0]);
-    write(fd[1], &testResult, sizeof(testResult));
-    close(fd[1]);
-    exit(0);
-
-  } else if (cPid > 0) {  // parent
-    close(fd[1]);
-    read(fd[0], &testResult, sizeof(testResult));
-    close(fd[0]);
-    wait(NULL);
-
-  } else {
-    printf("Info:fork() failed\n");
-    testResult = false;
-  }
-  return testResult;
-}
-#endif
-
-bool testhipInvalidDevice(int numDevices) {
-  hipError_t ret;
-  uint32_t linktype;
-  uint32_t hopcount;
-  SECTION("Invalid device number case 1") {
-    ret = hipExtGetLinkTypeAndHopCount(-1, 0, &linktype, &hopcount);
-    REQUIRE(ret != hipSuccess);
-  }
-  SECTION("Invalid device number case 2") {
-    ret = hipExtGetLinkTypeAndHopCount(numDevices, 0, &linktype, &hopcount);
-    REQUIRE(ret != hipSuccess);
-  }
-  SECTION("Invalid device number case 3") {
-    ret = hipExtGetLinkTypeAndHopCount(0, -1, &linktype, &hopcount);
-    REQUIRE(ret != hipSuccess);
-  }
-  SECTION("Invalid device number case 4") {
-    ret = hipExtGetLinkTypeAndHopCount(0, numDevices, &linktype, &hopcount);
-    REQUIRE(ret != hipSuccess);
-  }
-  SECTION("Invalid device number case 5") {
-    ret = hipExtGetLinkTypeAndHopCount(-1, numDevices, &linktype, &hopcount);
-    REQUIRE(ret != hipSuccess);
-  }
-  return true;
-}
-
-#ifdef __linux__
-bool testhipInvalidLinkType() {
-  uint32_t hopcount;
-  REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, nullptr,
-                                                     &hopcount));
-  return true;
-}
-
-bool testhipInvalidHopcount() {
-  uint32_t linktype;
-  REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, &linktype, nullptr));
-  return true;
-}
-
-bool testhipSameDevice(int numGPUs) {
-  hipError_t ret;
-  uint32_t linktype = 0;
-  uint32_t hopcount = 0;
-  for (int gpuId = 0; gpuId < numGPUs; gpuId++) {
-    ret = hipExtGetLinkTypeAndHopCount(gpuId, gpuId, &linktype, &hopcount);
-    REQUIRE(ret != hipSuccess);
-  }
-  return true;
-}
-
-bool testhipLinkTypeHopcountDeviceOrderRev(int numDevices) {
-  bool TestPassed = true;
-  // Get the unique pair of devices
-  for (int x = 0; x < numDevices; x++) {
-    for (int y = x+1; y < numDevices; y++) {
-      uint32_t linktype1 = 0, linktype2 = 0;
-      uint32_t hopcount1 = 0, hopcount2 = 0;
-      HIP_CHECK(hipExtGetLinkTypeAndHopCount(x, y,
-                          &linktype1, &hopcount1));
-      HIP_CHECK(hipExtGetLinkTypeAndHopCount(y, x,
-                          &linktype2, &hopcount2));
-      if (hopcount1 != hopcount2) {
-        TestPassed = false;
-        break;
-      }
-    }
-  }
-  return TestPassed;
-}
-
-/**
- * Internal Function
- */
-bool validateLinkType(uint32_t linktype_Hip,
-                      RSMI_IO_LINK_TYPE linktype_RocmSmi) {
-  bool TestPassed = false;
-
-  if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_PCIE) &&
-     (linktype_RocmSmi == RSMI_IOLINK_TYPE_PCIEXPRESS)) {
-    TestPassed = true;
-  } else if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_XGMI) &&
-     (linktype_RocmSmi == RSMI_IOLINK_TYPE_XGMI)) {
-    TestPassed = true;
-  } else {
-    printf("linktype Hip = %u, linktype RocmSmi = %u\n",
-            linktype_Hip, linktype_RocmSmi);
-    TestPassed = false;
-  }
-  return TestPassed;
-}
-
-bool testhipLinkTypeHopcountDevice(int numDevices) {
-  bool TestPassed = true;
-  // Opening and initializing rocm-smi library
-  void *lib_rocm_smi_hdl;
-  rsmi_status_t (*fntopo_get_link_type)(uint32_t, uint32_t, uint64_t*,
-                      RSMI_IO_LINK_TYPE*);
-  rsmi_status_t (*fntopo_init)(uint64_t);
-  rsmi_status_t (*fntopo_shut_down)();
-
-  lib_rocm_smi_hdl = dlopen("/opt/rocm/lib/librocm_smi64.so",
-                        RTLD_LAZY);
-  REQUIRE(lib_rocm_smi_hdl);
-
-  void* fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_topo_get_link_type");
-  REQUIRE(fnsym);
-
-  fntopo_get_link_type = reinterpret_cast<rsmi_status_t (*)(uint32_t,
-            uint32_t, uint64_t*, RSMI_IO_LINK_TYPE*)>(fnsym);
-
-  fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_init");
-  REQUIRE(fnsym);
-  fntopo_init = reinterpret_cast<rsmi_status_t (*)(uint64_t)>(fnsym);
-
-  fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_shut_down");
-  REQUIRE(fnsym);
-  fntopo_shut_down = reinterpret_cast<rsmi_status_t (*)()>(fnsym);
-
-  uint64_t init_flags = 0;
-  rsmi_status_t retsmi_init;
-  retsmi_init = fntopo_init(init_flags);
-  REQUIRE(RSMI_STATUS_SUCCESS == retsmi_init);
-
-  // Use rocm-smi API rsmi_topo_get_link_type() to validate
-  struct devicePair {
-    int device1;
-    int device2;
-  };
-  std::vector<struct devicePair> devicePairList;
-  // Get the unique pair of devices
-  for (int x = 0; x < numDevices; x++) {
-    for (int y = x+1; y < numDevices; y++) {
-      devicePairList.push_back({x, y});
-    }
-  }
-  for (auto pos=devicePairList.begin();
-       pos != devicePairList.end(); pos++) {
-    uint32_t linktype1 = 0;
-    uint32_t hopcount1 = 0;
-    RSMI_IO_LINK_TYPE linktype2 = RSMI_IOLINK_TYPE_UNDEFINED;
-    uint64_t hopcount2 = 0;
-    rsmi_status_t retsmi;
-    HIPCHECK(hipExtGetLinkTypeAndHopCount((*pos).device1,
-                (*pos).device2, &linktype1, &hopcount1));
-    retsmi = fntopo_get_link_type((*pos).device1,
-                (*pos).device2, &hopcount2, &linktype2);
-    REQUIRE(RSMI_STATUS_SUCCESS == retsmi);
-
-    // Validate linktype
-    TestPassed = validateLinkType(linktype1, linktype2);
-  }
-  fntopo_shut_down();
-  dlclose(lib_rocm_smi_hdl);
-  return TestPassed;
-}
-#endif
-
-/**
- * @addtogroup hipExtGetLinkTypeAndHopCount hipExtGetLinkTypeAndHopCount
- * @{
- * @ingroup p2pTest
- * `hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount)` -
- * Returns the link type and hop count between two devices
- * @}
- */
-
-/**
- * Test Description
- * ------------------------
- *    - Validates negative scenarios for hipExtGetLinkTypeAndHopCount
- * 1)Test Scenario to verify when device1 is visible and device2 is masked
- * 2)Test Scenario to verify Invalid Device Number(s)
- * 3)Test Scenario to verify when linktype = NULL
- * 4)Test Scenario to verify when hopcount = NULL
- * 5)Test Scenario to verify when device1 = device2
- * 6)Test Scenario: Verify (hopcount, linktype) values for (src= device1, dest = device2)
- * and (src = device2, dest = device1), where device1 and device2 are valid device numbers.
- * 7)Test Scenario: Verify (hopcount, linktype) values for all combination of
- * GPUs with the output of rocm_smi tool.
-
- * Test source
- * ------------------------
- *    - catch/unit/p2p/hipExtGetLinkTypeAndHopCount.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.5
- */
-
-TEST_CASE("Unit_hipP2pLinkTypeAndHopFunc") {
-  int numDevices = 0;
-  bool TestPassed = true;
-  HIP_CHECK(hipGetDeviceCount(&numDevices));
-  if (numDevices < 2) {
-    HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
-    return;
-  }
-  SECTION("Test running for testhipInvalidDevice") {
-    TestPassed = testhipInvalidDevice(numDevices);
-    REQUIRE(TestPassed == true);
-  }
-#ifdef __linux__
-  getDeviceCount(&numDevices);
-  if (numDevices < 2) {
-    HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
-    return;
-  }
-  SECTION("Test running for testMaskedDevice") {
-    TestPassed = testMaskedDevice(numDevices);
-    REQUIRE(TestPassed == true);
-  }
-  SECTION("Test running for testhipInvalidLinkType") {
-    TestPassed = testhipInvalidLinkType();
-    REQUIRE(TestPassed == true);
-  }
-  SECTION("Test running for testhipInvalidHopcount") {
-    TestPassed = testhipInvalidHopcount();
-    REQUIRE(TestPassed == true);
-  }
-  SECTION("Test running for testhipSameDevice") {
-    TestPassed = testhipSameDevice(numDevices);
-    REQUIRE(TestPassed == true);
-  }
-  SECTION("Test running for testhipLinkTypeHopcountDeviceOrderRev") {
-    TestPassed = testhipLinkTypeHopcountDeviceOrderRev(numDevices);
-    REQUIRE(TestPassed == true);
-  }
-  SECTION("Test running for testhipLinkTypeHopcountDevice") {
-    TestPassed = testhipLinkTypeHopcountDevice(numDevices);
-    REQUIRE(TestPassed == true);
-  }
-#else
-    printf("This test is skipped due to non linux environment.\n");
-#endif
-}
-
-/**
-* End doxygen group p2pTest.
-* @}
-*/
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "hipP2pLinkTypeAndHopFunc.h"
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+ 
+#ifdef __linux__
+#include <unistd.h>
+#include <sys/wait.h>
+#include <dlfcn.h>
+#endif
+#include <vector>
+#define MAX_SIZE 30
+#define VISIBLE_DEVICE 0
+
+/**
+ * Fetches Gpu device count
+ */
+#ifdef __linux__
+void getDeviceCount(int *pdevCnt) {
+  int fd[2], val = 0;
+  pid_t childpid;
+  // create pipe descriptors
+  pipe(fd);
+  // disable visible_devices env from shell
+  unsetenv("ROCR_VISIBLE_DEVICES");
+  unsetenv("HIP_VISIBLE_DEVICES");
+
+  childpid = fork();
+  if (childpid > 0) {  // Parent
+    close(fd[1]);
+    // parent will wait to read the device cnt
+    read(fd[0], &val, sizeof(val));
+    // close the read-descriptor
+    close(fd[0]);
+    // wait for child exit
+    wait(NULL);
+    *pdevCnt = val;
+  } else if (!childpid) {  // Child
+    int devCnt = 1;
+    // writing only, no need for read-descriptor
+    close(fd[0]);
+    HIP_CHECK(hipGetDeviceCount(&devCnt));
+    // send the value on the write-descriptor:
+    write(fd[1], &devCnt, sizeof(devCnt));
+    // close the write descriptor:
+    close(fd[1]);
+    exit(0);
+  } else {  // failure
+    *pdevCnt = 1;
+    return;
+  }
+}
+
+bool testMaskedDevice(int actualNumGPUs) {
+  bool testResult = true;
+  int fd[2];
+  pipe(fd);
+
+  pid_t cPid;
+  cPid = fork();
+  if (cPid == 0) {  // child
+    hipError_t err;
+    char visibleDeviceString[MAX_SIZE] = {};
+    snprintf(visibleDeviceString, MAX_SIZE, "%d", VISIBLE_DEVICE);
+    // disable visible_devices env from shell
+    unsetenv("ROCR_VISIBLE_DEVICES");
+    unsetenv("HIP_VISIBLE_DEVICES");
+    setenv("ROCR_VISIBLE_DEVICES", visibleDeviceString, 1);
+    setenv("HIP_VISIBLE_DEVICES", visibleDeviceString, 1);
+    uint32_t linktype;
+    uint32_t hopcount;
+    for (int count = 1;
+        count < actualNumGPUs; count++) {
+      err = hipExtGetLinkTypeAndHopCount(VISIBLE_DEVICE,
+            VISIBLE_DEVICE+count, &linktype, &hopcount);
+      REQUIRE(err == hipSuccess);
+    }
+    close(fd[0]);
+    write(fd[1], &testResult, sizeof(testResult));
+    close(fd[1]);
+    exit(0);
+
+  } else if (cPid > 0) {  // parent
+    close(fd[1]);
+    read(fd[0], &testResult, sizeof(testResult));
+    close(fd[0]);
+    wait(NULL);
+
+  } else {
+    printf("Info:fork() failed\n");
+    testResult = false;
+  }
+  return testResult;
+}
+#endif
+
+bool testhipInvalidDevice(int numDevices) {
+  hipError_t ret;
+  uint32_t linktype;
+  uint32_t hopcount;
+  SECTION("Invalid device number case 1") {
+    ret = hipExtGetLinkTypeAndHopCount(-1, 0, &linktype, &hopcount);
+    REQUIRE(ret != hipSuccess);
+  }
+  SECTION("Invalid device number case 2") {
+    ret = hipExtGetLinkTypeAndHopCount(numDevices, 0, &linktype, &hopcount);
+    REQUIRE(ret != hipSuccess);
+  }
+  SECTION("Invalid device number case 3") {
+    ret = hipExtGetLinkTypeAndHopCount(0, -1, &linktype, &hopcount);
+    REQUIRE(ret != hipSuccess);
+  }
+  SECTION("Invalid device number case 4") {
+    ret = hipExtGetLinkTypeAndHopCount(0, numDevices, &linktype, &hopcount);
+    REQUIRE(ret != hipSuccess);
+  }
+  SECTION("Invalid device number case 5") {
+    ret = hipExtGetLinkTypeAndHopCount(-1, numDevices, &linktype, &hopcount);
+    REQUIRE(ret != hipSuccess);
+  }
+  return true;
+}
+
+#ifdef __linux__
+bool testhipInvalidLinkType() {
+  uint32_t hopcount;
+  REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, nullptr,
+                                                     &hopcount));
+  return true;
+}
+
+bool testhipInvalidHopcount() {
+  uint32_t linktype;
+  REQUIRE(hipSuccess != hipExtGetLinkTypeAndHopCount(0, 1, &linktype, nullptr));
+  return true;
+}
+
+bool testhipSameDevice(int numGPUs) {
+  hipError_t ret;
+  uint32_t linktype = 0;
+  uint32_t hopcount = 0;
+  for (int gpuId = 0; gpuId < numGPUs; gpuId++) {
+    ret = hipExtGetLinkTypeAndHopCount(gpuId, gpuId, &linktype, &hopcount);
+    REQUIRE(ret != hipSuccess);
+  }
+  return true;
+}
+
+bool testhipLinkTypeHopcountDeviceOrderRev(int numDevices) {
+  bool TestPassed = true;
+  // Get the unique pair of devices
+  for (int x = 0; x < numDevices; x++) {
+    for (int y = x+1; y < numDevices; y++) {
+      uint32_t linktype1 = 0, linktype2 = 0;
+      uint32_t hopcount1 = 0, hopcount2 = 0;
+      HIP_CHECK(hipExtGetLinkTypeAndHopCount(x, y,
+                          &linktype1, &hopcount1));
+      HIP_CHECK(hipExtGetLinkTypeAndHopCount(y, x,
+                          &linktype2, &hopcount2));
+      if (hopcount1 != hopcount2) {
+        TestPassed = false;
+        break;
+      }
+    }
+  }
+  return TestPassed;
+}
+
+/**
+ * Internal Function
+ */
+bool validateLinkType(uint32_t linktype_Hip,
+                      RSMI_IO_LINK_TYPE linktype_RocmSmi) {
+  bool TestPassed = false;
+
+  if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_PCIE) &&
+     (linktype_RocmSmi == RSMI_IOLINK_TYPE_PCIEXPRESS)) {
+    TestPassed = true;
+  } else if ((linktype_Hip == HSA_AMD_LINK_INFO_TYPE_XGMI) &&
+     (linktype_RocmSmi == RSMI_IOLINK_TYPE_XGMI)) {
+    TestPassed = true;
+  } else {
+    printf("linktype Hip = %u, linktype RocmSmi = %u\n",
+            linktype_Hip, linktype_RocmSmi);
+    TestPassed = false;
+  }
+  return TestPassed;
+}
+
+bool testhipLinkTypeHopcountDevice(int numDevices) {
+  bool TestPassed = true;
+  // Opening and initializing rocm-smi library
+  void *lib_rocm_smi_hdl;
+  rsmi_status_t (*fntopo_get_link_type)(uint32_t, uint32_t, uint64_t*,
+                      RSMI_IO_LINK_TYPE*);
+  rsmi_status_t (*fntopo_init)(uint64_t);
+  rsmi_status_t (*fntopo_shut_down)();
+
+  lib_rocm_smi_hdl = dlopen("/opt/rocm/lib/librocm_smi64.so",
+                        RTLD_LAZY);
+  REQUIRE(lib_rocm_smi_hdl);
+
+  void* fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_topo_get_link_type");
+  REQUIRE(fnsym);
+
+  fntopo_get_link_type = reinterpret_cast<rsmi_status_t (*)(uint32_t,
+            uint32_t, uint64_t*, RSMI_IO_LINK_TYPE*)>(fnsym);
+
+  fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_init");
+  REQUIRE(fnsym);
+  fntopo_init = reinterpret_cast<rsmi_status_t (*)(uint64_t)>(fnsym);
+
+  fnsym = dlsym(lib_rocm_smi_hdl, "rsmi_shut_down");
+  REQUIRE(fnsym);
+  fntopo_shut_down = reinterpret_cast<rsmi_status_t (*)()>(fnsym);
+
+  uint64_t init_flags = 0;
+  rsmi_status_t retsmi_init;
+  retsmi_init = fntopo_init(init_flags);
+  REQUIRE(RSMI_STATUS_SUCCESS == retsmi_init);
+
+  // Use rocm-smi API rsmi_topo_get_link_type() to validate
+  struct devicePair {
+    int device1;
+    int device2;
+  };
+  std::vector<struct devicePair> devicePairList;
+  // Get the unique pair of devices
+  for (int x = 0; x < numDevices; x++) {
+    for (int y = x+1; y < numDevices; y++) {
+      devicePairList.push_back({x, y});
+    }
+  }
+  for (auto pos=devicePairList.begin();
+       pos != devicePairList.end(); pos++) {
+    uint32_t linktype1 = 0;
+    uint32_t hopcount1 = 0;
+    RSMI_IO_LINK_TYPE linktype2 = RSMI_IOLINK_TYPE_UNDEFINED;
+    uint64_t hopcount2 = 0;
+    rsmi_status_t retsmi;
+    HIPCHECK(hipExtGetLinkTypeAndHopCount((*pos).device1,
+                (*pos).device2, &linktype1, &hopcount1));
+    retsmi = fntopo_get_link_type((*pos).device1,
+                (*pos).device2, &hopcount2, &linktype2);
+    REQUIRE(RSMI_STATUS_SUCCESS == retsmi);
+
+    // Validate linktype
+    TestPassed = validateLinkType(linktype1, linktype2);
+  }
+  fntopo_shut_down();
+  dlclose(lib_rocm_smi_hdl);
+  return TestPassed;
+}
+#endif
+
+/**
+ * @addtogroup hipExtGetLinkTypeAndHopCount hipExtGetLinkTypeAndHopCount
+ * @{
+ * @ingroup p2pTest
+ * `hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount)` -
+ * Returns the link type and hop count between two devices
+ * @}
+ */
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Validates negative scenarios for hipExtGetLinkTypeAndHopCount
+ * 1)Test Scenario to verify when device1 is visible and device2 is masked
+ * 2)Test Scenario to verify Invalid Device Number(s)
+ * 3)Test Scenario to verify when linktype = NULL
+ * 4)Test Scenario to verify when hopcount = NULL
+ * 5)Test Scenario to verify when device1 = device2
+ * 6)Test Scenario: Verify (hopcount, linktype) values for (src= device1, dest = device2)
+ * and (src = device2, dest = device1), where device1 and device2 are valid device numbers.
+ * 7)Test Scenario: Verify (hopcount, linktype) values for all combination of
+ * GPUs with the output of rocm_smi tool.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/p2p/hipExtGetLinkTypeAndHopCount.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.5
+ */
+
+TEST_CASE("Unit_hipP2pLinkTypeAndHopFunc") {
+  int numDevices = 0;
+  bool TestPassed = true;
+  HIP_CHECK(hipGetDeviceCount(&numDevices));
+  if (numDevices < 2) {
+    HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
+    return;
+  }
+  SECTION("Test running for testhipInvalidDevice") {
+    TestPassed = testhipInvalidDevice(numDevices);
+    REQUIRE(TestPassed == true);
+  }
+#ifdef __linux__
+  getDeviceCount(&numDevices);
+  if (numDevices < 2) {
+    HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
+    return;
+  }
+  SECTION("Test running for testMaskedDevice") {
+    TestPassed = testMaskedDevice(numDevices);
+    REQUIRE(TestPassed == true);
+  }
+  SECTION("Test running for testhipInvalidLinkType") {
+    TestPassed = testhipInvalidLinkType();
+    REQUIRE(TestPassed == true);
+  }
+  SECTION("Test running for testhipInvalidHopcount") {
+    TestPassed = testhipInvalidHopcount();
+    REQUIRE(TestPassed == true);
+  }
+  SECTION("Test running for testhipSameDevice") {
+    TestPassed = testhipSameDevice(numDevices);
+    REQUIRE(TestPassed == true);
+  }
+  SECTION("Test running for testhipLinkTypeHopcountDeviceOrderRev") {
+    TestPassed = testhipLinkTypeHopcountDeviceOrderRev(numDevices);
+    REQUIRE(TestPassed == true);
+  }
+  SECTION("Test running for testhipLinkTypeHopcountDevice") {
+    TestPassed = testhipLinkTypeHopcountDevice(numDevices);
+    REQUIRE(TestPassed == true);
+  }
+#else
+    printf("This test is skipped due to non linux environment.\n");
+#endif
+}
+
+/**
+* End doxygen group p2pTest.
+* @}
+*/
diff --git a/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.h b/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.h
index 755743c328..0e791b66fd 100644
--- a/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.h
+++ b/projects/hip-tests/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.h
@@ -1,110 +1,110 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#ifndef _HIP_DIRTEST_P2PLINKTYPEHOP_H_
-#define _HIP_DIRTEST_P2PLINKTYPEHOP_H_
-/**
- * rocm_smi.h enums
- */
-typedef enum {
-  RSMI_STATUS_SUCCESS = 0x0,             //!< Operation was successful
-  RSMI_STATUS_INVALID_ARGS,              //!< Passed in arguments are not valid
-  RSMI_STATUS_NOT_SUPPORTED,             //!< The requested information or
-                                         //!< action is not available for the
-                                         //!< given input, on the given system
-  RSMI_STATUS_FILE_ERROR,                //!< Problem accessing a file. This
-                                         //!< may because the operation is not
-                                         //!< supported by the Linux kernel
-                                         //!< version running on the executing
-                                         //!< machine
-  RSMI_STATUS_PERMISSION,                //!< Permission denied/EACCESS file
-                                         //!< error. Many functions require
-                                         //!< root access to run.
-  RSMI_STATUS_OUT_OF_RESOURCES,          //!< Unable to acquire memory or other
-                                         //!< resource
-  RSMI_STATUS_INTERNAL_EXCEPTION,        //!< An internal exception was caught
-  RSMI_STATUS_INPUT_OUT_OF_BOUNDS,       //!< The provided input is out of
-                                         //!< allowable or safe range
-  RSMI_STATUS_INIT_ERROR,                //!< An error occurred when rsmi
-                                         //!< initializing internal data
-                                         //!< structures
-  RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR,
-  RSMI_STATUS_NOT_YET_IMPLEMENTED,       //!< The requested function has not
-                                         //!< yet been implemented in the
-                                         //!< current system for the current
-                                         //!< devices
-  RSMI_STATUS_NOT_FOUND,                 //!< An item was searched for but not
-                                         //!< found
-  RSMI_STATUS_INSUFFICIENT_SIZE,         //!< Not enough resources were
-                                         //!< available for the operation
-  RSMI_STATUS_INTERRUPT,                 //!< An interrupt occurred during
-                                         //!< execution of function
-  RSMI_STATUS_UNEXPECTED_SIZE,           //!< An unexpected amount of data
-                                         //!< was read
-  RSMI_STATUS_NO_DATA,                   //!< No data was found for a given
-                                         //!< input
-  RSMI_STATUS_UNEXPECTED_DATA,           //!< The data read or provided to
-                                         //!< function is not what was expected
-  RSMI_STATUS_BUSY,                      //!< A resource or mutex could not be
-                                         //!< acquired because it is already
-                                         //!< being used
-  RSMI_STATUS_REFCOUNT_OVERFLOW,          //!< An internal reference counter
-                                         //!< exceeded INT32_MAX
-
-  RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF,  //!< An unknown error occurred
-} rsmi_status_t;
-
-/**
- * Types for IO Link returned from rocm_smi
- */
-typedef enum _RSMI_IO_LINK_TYPE {
-  RSMI_IOLINK_TYPE_UNDEFINED      = 0,          //!< unknown type.
-  RSMI_IOLINK_TYPE_PCIEXPRESS     = 1,          //!< PCI Express
-  RSMI_IOLINK_TYPE_XGMI           = 2,          //!< XGMI
-  RSMI_IOLINK_TYPE_NUMIOLINKTYPES,              //!< Number of IO Link types
-  RSMI_IOLINK_TYPE_SIZE           = 0xFFFFFFFF  //!< Max of IO Link types
-} RSMI_IO_LINK_TYPE;
-
-/**
- * Types for IO Link returned from rocm runtime
- */
-typedef enum {
-  /**
-  * Hyper-transport bus type.
-  */
-  HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0,
-  /**
-  * QPI bus type.
-  */
-  HSA_AMD_LINK_INFO_TYPE_QPI = 1,
-  /**
-  * PCIe bus type.
-  */
-  HSA_AMD_LINK_INFO_TYPE_PCIE = 2,
-  /**
-  * Infiniband bus type.
-  */
-  HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3,
-  /**
-  * xGMI link type.
-  */
-  HSA_AMD_LINK_INFO_TYPE_XGMI = 4
-} hsa_amd_link_info_type_t;
-
-#endif  // _HIP_DIRTEST_P2PLINKTYPEHOP_H_
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#ifndef _HIP_DIRTEST_P2PLINKTYPEHOP_H_
+#define _HIP_DIRTEST_P2PLINKTYPEHOP_H_
+/**
+ * rocm_smi.h enums
+ */
+typedef enum {
+  RSMI_STATUS_SUCCESS = 0x0,             //!< Operation was successful
+  RSMI_STATUS_INVALID_ARGS,              //!< Passed in arguments are not valid
+  RSMI_STATUS_NOT_SUPPORTED,             //!< The requested information or
+                                         //!< action is not available for the
+                                         //!< given input, on the given system
+  RSMI_STATUS_FILE_ERROR,                //!< Problem accessing a file. This
+                                         //!< may because the operation is not
+                                         //!< supported by the Linux kernel
+                                         //!< version running on the executing
+                                         //!< machine
+  RSMI_STATUS_PERMISSION,                //!< Permission denied/EACCESS file
+                                         //!< error. Many functions require
+                                         //!< root access to run.
+  RSMI_STATUS_OUT_OF_RESOURCES,          //!< Unable to acquire memory or other
+                                         //!< resource
+  RSMI_STATUS_INTERNAL_EXCEPTION,        //!< An internal exception was caught
+  RSMI_STATUS_INPUT_OUT_OF_BOUNDS,       //!< The provided input is out of
+                                         //!< allowable or safe range
+  RSMI_STATUS_INIT_ERROR,                //!< An error occurred when rsmi
+                                         //!< initializing internal data
+                                         //!< structures
+  RSMI_INITIALIZATION_ERROR = RSMI_STATUS_INIT_ERROR,
+  RSMI_STATUS_NOT_YET_IMPLEMENTED,       //!< The requested function has not
+                                         //!< yet been implemented in the
+                                         //!< current system for the current
+                                         //!< devices
+  RSMI_STATUS_NOT_FOUND,                 //!< An item was searched for but not
+                                         //!< found
+  RSMI_STATUS_INSUFFICIENT_SIZE,         //!< Not enough resources were
+                                         //!< available for the operation
+  RSMI_STATUS_INTERRUPT,                 //!< An interrupt occurred during
+                                         //!< execution of function
+  RSMI_STATUS_UNEXPECTED_SIZE,           //!< An unexpected amount of data
+                                         //!< was read
+  RSMI_STATUS_NO_DATA,                   //!< No data was found for a given
+                                         //!< input
+  RSMI_STATUS_UNEXPECTED_DATA,           //!< The data read or provided to
+                                         //!< function is not what was expected
+  RSMI_STATUS_BUSY,                      //!< A resource or mutex could not be
+                                         //!< acquired because it is already
+                                         //!< being used
+  RSMI_STATUS_REFCOUNT_OVERFLOW,          //!< An internal reference counter
+                                         //!< exceeded INT32_MAX
+
+  RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF,  //!< An unknown error occurred
+} rsmi_status_t;
+
+/**
+ * Types for IO Link returned from rocm_smi
+ */
+typedef enum _RSMI_IO_LINK_TYPE {
+  RSMI_IOLINK_TYPE_UNDEFINED      = 0,          //!< unknown type.
+  RSMI_IOLINK_TYPE_PCIEXPRESS     = 1,          //!< PCI Express
+  RSMI_IOLINK_TYPE_XGMI           = 2,          //!< XGMI
+  RSMI_IOLINK_TYPE_NUMIOLINKTYPES,              //!< Number of IO Link types
+  RSMI_IOLINK_TYPE_SIZE           = 0xFFFFFFFF  //!< Max of IO Link types
+} RSMI_IO_LINK_TYPE;
+
+/**
+ * Types for IO Link returned from rocm runtime
+ */
+typedef enum {
+  /**
+  * Hyper-transport bus type.
+  */
+  HSA_AMD_LINK_INFO_TYPE_HYPERTRANSPORT = 0,
+  /**
+  * QPI bus type.
+  */
+  HSA_AMD_LINK_INFO_TYPE_QPI = 1,
+  /**
+  * PCIe bus type.
+  */
+  HSA_AMD_LINK_INFO_TYPE_PCIE = 2,
+  /**
+  * Infiniband bus type.
+  */
+  HSA_AMD_LINK_INFO_TYPE_INFINBAND = 3,
+  /**
+  * xGMI link type.
+  */
+  HSA_AMD_LINK_INFO_TYPE_XGMI = 4
+} hsa_amd_link_info_type_t;
+
+#endif  // _HIP_DIRTEST_P2PLINKTYPEHOP_H_
diff --git a/projects/hip-tests/catch/unit/rtc/RtcFunctions.cpp b/projects/hip-tests/catch/unit/rtc/RtcFunctions.cpp
index 14ea47e6c2..8de295783f 100644
--- a/projects/hip-tests/catch/unit/rtc/RtcFunctions.cpp
+++ b/projects/hip-tests/catch/unit/rtc/RtcFunctions.cpp
@@ -1,3300 +1,3300 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-This file contains functions for idividual HIPRTC supported compiler options
-validation. For PASS senario the function returns 1 or 0 otherwise.
-*/
-
-#include <hip/hiprtc.h>
-#include <hip/hip_runtime.h>
-#include <hip/hip_fp16.h>
-#include <picojson.h>
-#include <fstream>
-#include <vector>
-#include <string>
-#include <iostream>
-#include "headers/RtcUtility.h"
-#include "headers/RtcFunctions.h"
-#include "headers/RtcKernels.h"
-#include <hip_test_common.hh>
-#include "headers/printf_common.h"
-
-#pragma clang diagnostic ignored "-Wunused-parameter"
-#pragma clang diagnostic ignored "-Wunused-variable"
-
-bool check_architecture(const char** Combination_CO,
-                        int Combination_CO_size, int max_thread_pos,
-                        int fast_math_present) {
-  std::string block_name = "architecture";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string retrieved_CO = get_string_parameters("compiler_option",
-                                                   block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  hipDeviceProp_t prop;
-  HIP_CHECK(hipGetDeviceProperties(&prop, 0));
-  std::string actual_architecture = prop.gcnArchName;
-  std::string complete_CO = retrieved_CO + actual_architecture;
-  const char* compiler_option = complete_CO.c_str();
-  hiprtcProgram prog;
-  HIPRTC_CHECK(hiprtcCreateProgram(&prog, max_thread_string,
-                                           kername, 0, NULL, NULL));
-  if (Combination_CO_size != -1) {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog,
-                                                    Combination_CO_size,
-                                                    Combination_CO)};
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler Option : " << compiler_option);
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  } else {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
-                                                   &compiler_option)};
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler Option : " << compiler_option);
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  }
-  return 1;
-}
-
-bool check_rdc(const char** Combination_CO, int Combination_CO_size,
-               int max_thread_pos, int fast_math_present) {
-  std::string block_name = "rdc";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string CO = get_string_parameters("compiler_option",
-                                                  block_name);
-  if (CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  const char* compiler_opt = CO.c_str();
-  float *A_d, *B_d, *C_d;
-  float *A_h, *B_h, *C_h, *result;
-  float Nbytes = sizeof(float);
-  A_h = new float[1];
-  B_h = new float[1];
-  C_h = new float[1];
-  result = new float[1];
-  for (int i = 0; i < 1; i++) {
-    A_h[i] = 4;
-    B_h[i] = 4;
-    result[i] = 16;
-  }
-  HIP_CHECK(hipMalloc(&A_d, Nbytes));
-  HIP_CHECK(hipMalloc(&B_d, Nbytes));
-  HIP_CHECK(hipMalloc(&C_d, Nbytes));
-  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
-  HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
-  hiprtcProgram prog;
-  HIPRTC_CHECK(hiprtcCreateProgram(&prog, rdc_string, kername, 0, NULL, NULL));
-  if (Combination_CO_size != -1) {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
-                                                    Combination_CO)};
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler Option : " << compiler_opt);
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  } else {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, &compiler_opt)};
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler Option : " << compiler_opt);
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  }
-  void* kernelParam[] = {A_d, B_d, C_d};
-  auto size = sizeof(kernelParam);
-  void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
-                              HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
-                              HIP_LAUNCH_PARAM_END};
-  size_t codeSize;
-  HIPRTC_CHECK(hiprtcGetBitcodeSize(prog, &codeSize));
-  std::vector<char> codec(codeSize);
-  HIPRTC_CHECK(hiprtcGetBitcode(prog, codec.data()));
-  float wall_time;
-  int reg_count = 2;
-  int max_thread = 1;
-  unsigned int log_size = 5120;
-  char error_log[5120];
-  char info_log[5120];
-  std::vector<hiprtcJIT_option> jit_options = {HIPRTC_JIT_MAX_REGISTERS,
-                                               HIPRTC_JIT_THREADS_PER_BLOCK,
-                                               HIPRTC_JIT_WALL_TIME,
-                                               HIPRTC_JIT_INFO_LOG_BUFFER,
-                                       HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
-                                               HIPRTC_JIT_ERROR_LOG_BUFFER,
-                                       HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
-                                               HIPRTC_JIT_LOG_VERBOSE};
-  const void* lopts[] = {reinterpret_cast<void*>(&reg_count),
-                         reinterpret_cast<void*>(&max_thread),
-                         reinterpret_cast<void*>(&wall_time),
-                         info_log,
-                         reinterpret_cast<void*>(log_size),
-                         error_log,
-                         reinterpret_cast<void*>(log_size),
-                         reinterpret_cast<void*>(1)};
-  hiprtcLinkState rtc_link_state;
-  void* binary;
-  size_t binarySize;
-  int pass_count = 0;
-  hipModule_t module;
-  hipFunction_t function;
-  for (int i = 0; i < 2; i++) {
-    switch (i) {
-      case 0 :
-        HIPRTC_CHECK(hiprtcLinkCreate(0, nullptr, nullptr, &rtc_link_state));
-        HIPRTC_CHECK(hiprtcLinkAddData(rtc_link_state,
-                                       HIPRTC_JIT_INPUT_LLVM_BITCODE,
-                                       codec.data(), codeSize, 0, 0, 0, 0));
-        HIPRTC_CHECK(hiprtcLinkComplete(rtc_link_state, &binary, &binarySize));
-        HIP_CHECK(hipModuleLoadData(&module, binary));
-        HIP_CHECK(hipModuleGetFunction(&function, module, kername));
-        HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0,
-                                        nullptr, kernel_parameter));
-        pass_count++;
-        break;
-      case 1 :
-        HIPRTC_CHECK(hiprtcLinkCreate(8, jit_options.data(),
-                                      reinterpret_cast<void**>(&lopts),
-                                      &rtc_link_state));
-        HIPRTC_CHECK(hiprtcLinkAddData(rtc_link_state,
-                                       HIPRTC_JIT_INPUT_LLVM_BITCODE,
-                                       codec.data(), codeSize, 0, 0, 0, 0));
-        HIPRTC_CHECK(hiprtcLinkComplete(rtc_link_state, &binary, &binarySize));
-        HIP_CHECK(hipModuleLoadData(&module, binary));
-        HIP_CHECK(hipModuleGetFunction(&function, module, kername));
-        HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0,
-                                        nullptr, kernel_parameter));
-        pass_count++;
-        break;
-      default:
-        WARN(" NOT VALID INPUT ");
-        break;
-    }
-  }
-  HIP_CHECK(hipMemcpy(result, C_d, Nbytes, hipMemcpyDeviceToHost));
-  for (int i = 0 ; i< 1; i++) {
-    if (result[i] != ((A_h[i] * B_h[i]))) {
-      WARN("Compiler Option : " << compiler_opt);
-      WARN("EXPECTED RESULT DOES NOT MATCH ");
-      WARN("INPUT A & B : " << A_h[i] <<" , "<< B_h[i]);
-      WARN("EXPECTED RES : " << (A_h[i] * B_h[i]));
-      WARN("OBTAINED RES : " << result[i]);
-      return 0;
-    }
-  }
-  if (pass_count == 2) {
-    return 1;
-  } else {
-    WARN(" pass_count IS NOT MATCHING ");
-    return 0;
-  }
-}
-
-bool check_denormals_enabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "denormals";
-  std::string retrieved_CO = get_string_parameters("compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  picojson::array Input_Vals = get_array_parameters("Input_Vals", block_name);
-  picojson::array Expected_Results = get_array_parameters("Expected_Results",
-                                                          block_name);
-  const char* kername = kernel_name.c_str();
-  const char* compiler_option = retrieved_CO.c_str();
-  std::vector<double> double_vec_input;
-  for (auto& indx : Input_Vals) {
-    double_vec_input.push_back(indx.get<double>());
-  }
-  std::vector<int> Input_Vals_int;
-  for (auto& indx : double_vec_input) {
-    Input_Vals_int.push_back(static_cast<int>(indx));
-  }
-  std::vector<double> double_vec_expected;
-  for (auto& indx : Expected_Results) {
-    double_vec_expected.push_back(indx.get<double>());
-  }
-  std::vector<int> Expected_Results_int;
-  for (auto& indx : double_vec_expected) {
-    Expected_Results_int.push_back(static_cast<int>(indx));
-  }
-  int test_case, res_inc;
-  for (test_case = 0, res_inc = 0; test_case < Input_Vals_int.size() &&
-       res_inc < Expected_Results_int.size(); test_case+=2, res_inc++) {
-    double *base_h, *power_h, *result_h;
-    double *base_d, *power_d, *result_d;
-    double Nbytes = sizeof(double);
-    base_h = new double[1];
-    power_h = new double[1];
-    result_h = new double[1];
-    *base_h = Input_Vals_int[test_case];
-    *power_h = Input_Vals_int[test_case+1];
-    *result_h = 1;
-    HIP_CHECK(hipMalloc(&base_d, Nbytes));
-    HIP_CHECK(hipMalloc(&power_d, Nbytes));
-    HIP_CHECK(hipMalloc(&result_d, Nbytes));
-    HIP_CHECK(hipMemcpy(base_d, base_h, Nbytes, hipMemcpyHostToDevice));
-    HIP_CHECK(hipMemcpy(power_d, power_h, Nbytes, hipMemcpyHostToDevice));
-    HIP_CHECK(hipMemcpy(result_d, result_h, Nbytes, hipMemcpyHostToDevice));
-    hiprtcProgram program;
-    HIPRTC_CHECK(hiprtcCreateProgram(&program, denormals_string,
-                                                  "denormals", 0, NULL, NULL));
-    if (Combination_CO_size != -1) {
-      hiprtcResult compileResult{hiprtcCompileProgram(program,
-                                                      Combination_CO_size,
-                                                      Combination_CO)};
-      if (!(compileResult == HIPRTC_SUCCESS)) {
-        WARN("Compiler Option : " << compiler_option);
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-        WARN("hiprtcCompileProgram() api failed!! with error code: ");
-        WARN(compileResult);
-        size_t logSize;
-        HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize));
-        if (logSize) {
-          std::string log(logSize, '\0');
-          HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0]));
-          WARN(log);
-        }
-        return 0;
-      }
-    } else {
-      hiprtcResult compileResult{hiprtcCompileProgram(program, 1,
-                                                    &compiler_option)};
-      if (!(compileResult == HIPRTC_SUCCESS)) {
-        WARN("Compiler Option : " << compiler_option);
-        WARN("hiprtcCompileProgram() api failed!! with error code: ");
-        WARN(compileResult);
-        size_t logSize;
-        HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize));
-        if (logSize) {
-          std::string log(logSize, '\0');
-          HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0]));
-          WARN(log);
-        }
-        return 0;
-      }
-    }
-    size_t codeSize;
-    HIPRTC_CHECK(hiprtcGetCodeSize(program, &codeSize));
-    std::vector<char> codec(codeSize);
-    HIPRTC_CHECK(hiprtcGetCode(program, codec.data()));
-    void* kernelParam[] = {base_d, power_d, result_d};
-    auto size = sizeof(kernelParam);
-    void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
-                        HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
-                        HIP_LAUNCH_PARAM_END};
-    hipModule_t module;
-    hipFunction_t function;
-    HIP_CHECK(hipModuleLoadData(&module, codec.data()));
-    HIP_CHECK(hipModuleGetFunction(&function, module, kername));
-    HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0,
-                                              nullptr, kernel_parameter));
-    HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(double),
-                        hipMemcpyDeviceToHost));
-    HIP_CHECK(hipDeviceSynchronize());
-    HIP_CHECK(hipModuleUnload(module));
-    HIPRTC_CHECK(hiprtcDestroyProgram(&program));
-    if (*result_h != Expected_Results_int[res_inc]) {
-      WARN("Compiler Option : " << compiler_option);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("EXPECTED RESULT DOES NOT MATCH FOR " << res_inc);
-      WARN("th ITERATION (start iteration is 0 ) ");
-      WARN("INPUT : pow(2, " << *power_h << ") ");
-      WARN("EXPECTED OP: " << Expected_Results_int[res_inc]);
-      WARN("OBTAINED OP: " << *result_h);
-      return 0;
-    }
-  }
-  return 1;
-}
-
-bool check_denormals_disabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "denormals";
-  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  picojson::array Input_Vals = get_array_parameters("Input_Vals", block_name);
-  picojson::array Expected_Results_for_no = get_array_parameters(
-                                        "Expected_Results_for_no", block_name);
-  const char* kername = kernel_name.c_str();
-  const char* compiler_option = retrieved_CO.c_str();
-  std::vector<double> double_vec_input;
-  for (auto& indx : Input_Vals) {
-    double_vec_input.push_back(indx.get<double>());
-  }
-  std::vector<int> Input_Vals_int;
-  for (auto& indx : double_vec_input) {
-    Input_Vals_int.push_back(static_cast<int>(indx));
-  }
-  std::vector<double> double_vec_expected_for_no;
-  for (auto& indx : Expected_Results_for_no) {
-    double_vec_expected_for_no.push_back(indx.get<double>());
-  }
-  std::vector<int> Expected_Results_for_no_int;
-  for (auto& indx : double_vec_expected_for_no) {
-    Expected_Results_for_no_int.push_back(static_cast<int>(indx));
-  }
-  int test_case, res_inc;
-  for (test_case = 0, res_inc = 0; test_case < Input_Vals_int.size() &&
-       res_inc < Expected_Results_for_no_int.size(); test_case+=2, res_inc++) {
-    double *base_h, *power_h, *result_h;
-    double *base_d, *power_d, *result_d;
-    double Nbytes = sizeof(double);
-    base_h = new double[1];
-    power_h = new double[1];
-    result_h = new double[1];
-    *base_h = Input_Vals_int[test_case];
-    *power_h = Input_Vals_int[test_case+1];
-    *result_h = 0;
-    HIP_CHECK(hipMalloc(&base_d, Nbytes));
-    HIP_CHECK(hipMalloc(&power_d, Nbytes));
-    HIP_CHECK(hipMalloc(&result_d, Nbytes));
-    HIP_CHECK(hipMemcpy(base_d, base_h, Nbytes, hipMemcpyHostToDevice));
-    HIP_CHECK(hipMemcpy(power_d, power_h, Nbytes, hipMemcpyHostToDevice));
-    HIP_CHECK(hipMemcpy(result_d, result_h, Nbytes, hipMemcpyHostToDevice));
-    hiprtcProgram program;
-    HIPRTC_CHECK(hiprtcCreateProgram(&program, denormals_string,
-                                                  "denormals", 0, NULL, NULL));
-    if (Combination_CO_size != -1) {
-      hiprtcResult compileResult{hiprtcCompileProgram(program,
-                                                      Combination_CO_size,
-                                                      Combination_CO)};
-      if (!(compileResult == HIPRTC_SUCCESS)) {
-        WARN("Compiler Option : " << compiler_option);
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-        WARN("hiprtcCompileProgram() api failed!! with error code: ");
-        WARN(compileResult);
-        size_t logSize;
-        HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize));
-        if (logSize) {
-          std::string log(logSize, '\0');
-          HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0]));
-          WARN(log);
-        }
-        return 0;
-      }
-    } else {
-      hiprtcResult compileResult{hiprtcCompileProgram(program, 1,
-                                                    &compiler_option)};
-      if (!(compileResult == HIPRTC_SUCCESS)) {
-        WARN("Compiler Option : " << compiler_option);
-        WARN("hiprtcCompileProgram() api failed!! with error code: ");
-        WARN(compileResult);
-        size_t logSize;
-        HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize));
-        if (logSize) {
-          std::string log(logSize, '\0');
-          HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0]));
-          WARN(log);
-        }
-        return 0;
-      }
-    }
-    size_t codeSize;
-    HIPRTC_CHECK(hiprtcGetCodeSize(program, &codeSize));
-    std::vector<char> codec(codeSize);
-    HIPRTC_CHECK(hiprtcGetCode(program, codec.data()));
-    void* kernelParam[] = {base_d, power_d, result_d};
-    auto size = sizeof(kernelParam);
-    void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
-                        HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
-                        HIP_LAUNCH_PARAM_END};
-    hipModule_t module;
-    hipFunction_t function;
-    HIP_CHECK(hipModuleLoadData(&module, codec.data()));
-    HIP_CHECK(hipModuleGetFunction(&function, module, kername));
-    HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0,
-                                              nullptr, kernel_parameter));
-    HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(double),
-                        hipMemcpyDeviceToHost));
-    HIP_CHECK(hipDeviceSynchronize());
-    HIP_CHECK(hipModuleUnload(module));
-    HIPRTC_CHECK(hiprtcDestroyProgram(&program));
-    if (*result_h != Expected_Results_for_no_int[res_inc]) {
-      WARN("Compiler Option : " << compiler_option);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("EXPECTED RESULT DOES NOT MATCH FOR " << res_inc);
-      WARN("th ITERATION (start iteration is 0 ) ");
-      WARN("INPUT : pow(2, " << *power_h << ") ");
-      WARN("EXPECTED OP: "<< Expected_Results_for_no_int[res_inc]);
-      WARN("OBTAINED OP: "<< *result_h);
-      return 0;
-    }
-  }
-  return 1;
-}
-
-bool check_ffp_contract_off(const char** Combination_CO,
-                           int Combination_CO_size, int max_thread_pos,
-                           int fast_math_present) {
-  std::string block_name = "ffp_contract";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  picojson::array retrieved_CO = get_array_parameters("compiler_option",
-                                                          block_name);
-  if (retrieved_CO.size() < 3) {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::vector<std::string> CO_vec;
-  for (auto& indx : retrieved_CO) {
-    CO_vec.push_back(indx.get<std::string>());
-  }
-  int CO_IRadded_size = 3;
-  const char** CO_IRadded = new const char*[3];
-  std::string hold = CO_vec[0];
-  CO_IRadded[0] = hold.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                 Combination_CO, Combination_CO_size);
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO[0]);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (data.find("fmul contract") != -1 &&
-      data.find("@llvm.fmuladd.f32") != -1) {
-    WARN("Compiler option : " << retrieved_CO[0]);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR CONTAIN EITHER");
-    WARN("'fmul contract' or '@llvm.fmuladd.f32' or both ");
-    WARN("WHICH IS NOT EXPECTED");
-    return 0;
-  } else {
-    return 1;
-  }
-}
-
-bool check_ffp_contract_on(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "ffp_contract";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  picojson::array retrieved_CO = get_array_parameters("compiler_option",
-                                                          block_name);
-  if (retrieved_CO.size() < 3) {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::vector<std::string> CO_vec;
-  for (auto& indx : retrieved_CO) {
-    CO_vec.push_back(indx.get<std::string>());
-  }
-  int CO_IRadded_size = 3;
-  const char** CO_IRadded = new const char*[3];
-  std::string hold = CO_vec[1];
-  CO_IRadded[0] = hold.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  std::string data = checking_IR(kername, CO_IRadded,
-                                 CO_IRadded_size, Combination_CO,
-                                 Combination_CO_size);
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO[1]);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (fast_math_present!= -1) {
-    if (fast_math_present == 0 && data.find("@llvm.fmuladd.f32")!= -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO[1]);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN '@llvm.fmuladd.f32' ");
-      return 0;
-    }
-  } else {
-    if (data.find("@llvm.fmuladd.f32") != -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO[1]);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN '@llvm.fmuladd.f32' ");
-      return 0;
-    }
-  }
-}
-
-bool check_ffp_contract_fast(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "ffp_contract";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  picojson::array retrieved_CO = get_array_parameters("compiler_option",
-                                                          block_name);
-  if (retrieved_CO.size() < 3) {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::vector<std::string> CO_vec;
-  for (auto& indx : retrieved_CO) {
-    CO_vec.push_back(indx.get<std::string>());
-  }
-  int CO_IRadded_size = 3;
-  const char** CO_IRadded = new const char*[3];
-  std::string hold = CO_vec[2];
-  CO_IRadded[0] = hold.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                 Combination_CO, Combination_CO_size);
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO[2]);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (fast_math_present!= -1) {
-    if (fast_math_present == 1 && data.find("contract")!= -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO[2]);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN 'fmul contract' ");
-      return 0;
-    }
-  } else {
-    if (data.find("fmul contract") != -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO[2]);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN 'fmul contract' ");
-      return 0;
-    }
-  }
-}
-
-bool check_fast_math_enabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "fast_math";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string retrieved_CO = get_string_parameters("compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  int CO_IRadded_size = 3;
-  const char** CO_IRadded = new const char*[3];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                 Combination_CO, Combination_CO_size);
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (data.find("fmul fast")!= -1) {
-    return 1;
-  } else {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR DOESN'T CONTAIN 'fmul fast' ");
-    return 0;
-  }
-}
-
-bool check_fast_math_disabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "fast_math";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  int CO_IRadded_size = 3;
-  const char** CO_IRadded = new const char*[3];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                 Combination_CO, Combination_CO_size);
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (data.find("fmul fast")!= -1) {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR DOESN'T CONTAIN 'fmul fast' ");
-    return 0;
-  } else {
-    return 1;
-  }
-}
-
-bool check_slp_vectorize_enabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "slp_vectorize";
-  std::string retrieved_CO = get_string_parameters("compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  int CO_IRadded_size = 3;
-  const char** CO_IRadded = new const char*[3];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  __half2 *a_d, *x_d, *y_d;
-  __half2 a_h, x_h;
-  a_h.data.x = 1.5;
-  x_h.data.y = 3.0;
-  CaptureStream capture(stderr);
-  HIP_CHECK(hipMalloc(&a_d, sizeof(__half2)));
-  HIP_CHECK(hipMalloc(&x_d, sizeof(__half2)));
-  HIP_CHECK(hipMalloc(&y_d, sizeof(__half2)));
-  HIP_CHECK(hipMemcpy(a_d, &a_h, sizeof(__half2), hipMemcpyHostToDevice));
-  HIP_CHECK(hipMemcpy(x_d, &x_h, sizeof(__half2), hipMemcpyHostToDevice));
-  hiprtcProgram prog;
-  HIPRTC_CHECK(hiprtcCreateProgram(&prog, slp_vectorize_string,
-                                                kername, 0, NULL, NULL));
-  if (Combination_CO_size != -1) {
-    int Combination_CO_IRadded_size = Combination_CO_size+3;
-    int b = 0;
-    std::vector<std::string> add_ir_forcombi(Combination_CO_size + 3, "");
-    const char** Combination_CO_IRadded =
-                                       new const char*[Combination_CO_size+3];
-    for (int i = 0; i < Combination_CO_size+3; ++i) {
-      if (i == Combination_CO_size) {
-        Combination_CO_IRadded[i] = "-fno-signed-zeros";
-        Combination_CO_IRadded[i+1] = "-mllvm";
-        Combination_CO_IRadded[i+2] = "-print-after=constmerge";
-        break;
-      }
-      add_ir_forcombi[i] = Combination_CO[b];
-      Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str();
-      b++;
-    }
-    capture.Begin();
-    hiprtcResult compileResult{hiprtcCompileProgram(prog,
-                                               Combination_CO_IRadded_size,
-                                               Combination_CO_IRadded)};
-    capture.End();
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler option : " << retrieved_CO);
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size+3; i++) {
-        WARN(Combination_CO_IRadded[i]);
-      }
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  } else {
-    capture.Begin();
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, CO_IRadded_size,
-                                               CO_IRadded)};
-    capture.End();
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler option : " << retrieved_CO);
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  }
-  std::string data = capture.getData();
-  std::stringstream dataStream;
-  size_t codeSize;
-  HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
-  std::vector<char> codec(codeSize);
-  HIPRTC_CHECK(hiprtcGetCode(prog, codec.data()));
-  void* kernelParam[] = {reinterpret_cast<void*>(a_d),
-                         reinterpret_cast<void*>(x_d),
-                         reinterpret_cast<void*>(y_d)};
-  auto size = sizeof(kernelParam);
-  void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
-                              HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
-                              HIP_LAUNCH_PARAM_END};
-  hipModule_t module;
-  hipFunction_t function;
-  HIP_CHECK(hipModuleLoadData(&module, codec.data()));
-  HIP_CHECK(hipModuleGetFunction(&function, module, kername));
-  HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr,
-                                  kernel_parameter));
-  HIP_CHECK(hipDeviceSynchronize());
-  HIP_CHECK(hipModuleUnload(module));
-  HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  int times = 0;
-  if (data.find("contract <2 x half>", 0) != -1) {
-    times++;
-  }
-  int start = data.find("contract <2 x half>", 0) + 1;
-  while (data.find("contract <2 x half>", start) != -1) {
-    times++;
-    start = data.find("contract <2 x half>", start)+1;
-  }
-  if (times == 1) {
-    return 1;
-  } else if (times == 0) {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR DOESN'T CONTAIN 'fadd contract <2 x half>' ");
-    return 0;
-  } else {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR CONTAIN 'fadd contract <2 x half>' " << times << "times");
-    WARN(" WHICH IS NOT EXPECTED (IT SHOULD BE PRESENT ONCE)");
-    return 0;
-  }
-}
-
-bool check_slp_vectorize_disabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "slp_vectorize";
-  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  int CO_IRadded_size = 3;
-  const char** CO_IRadded = new const char*[3];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  __half2 *a_d, *x_d, *y_d;
-  __half2 a_h, x_h;
-  a_h.data.x = 1.5;
-  x_h.data.y = 3.0;
-  CaptureStream capture(stderr);
-  HIP_CHECK(hipMalloc(&a_d, sizeof(__half2)));
-  HIP_CHECK(hipMalloc(&x_d, sizeof(__half2)));
-  HIP_CHECK(hipMalloc(&y_d, sizeof(__half2)));
-  HIP_CHECK(hipMemcpy(a_d, &a_h, sizeof(__half2), hipMemcpyHostToDevice));
-  HIP_CHECK(hipMemcpy(x_d, &x_h, sizeof(__half2), hipMemcpyHostToDevice));
-  hiprtcProgram prog;
-  HIPRTC_CHECK(hiprtcCreateProgram(&prog, slp_vectorize_string,
-                                                kername, 0, NULL, NULL));
-  if (Combination_CO_size != -1) {
-    int Combination_CO_IRadded_size = Combination_CO_size+3;
-    int b = 0;
-    std::vector<std::string> add_ir_forcombi(Combination_CO_size + 3, "");
-    const char** Combination_CO_IRadded =
-                                       new const char*[Combination_CO_size+3];
-    for (int i = 0; i < Combination_CO_size+3; ++i) {
-      if (i == Combination_CO_size) {
-        Combination_CO_IRadded[i] = "-fno-signed-zeros";
-        Combination_CO_IRadded[i+1] = "-mllvm";
-        Combination_CO_IRadded[i+2] = "-print-after=constmerge";
-        break;
-      }
-      add_ir_forcombi[i] = Combination_CO[b];
-      Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str();
-      b++;
-    }
-    capture.Begin();
-    hiprtcResult compileResult{hiprtcCompileProgram(prog,
-                                               Combination_CO_IRadded_size,
-                                               Combination_CO_IRadded)};
-    capture.End();
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler option : " << retrieved_CO);
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size+3; i++) {
-        WARN(Combination_CO_IRadded[i]);
-      }
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  } else {
-    capture.Begin();
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, CO_IRadded_size,
-                                               CO_IRadded)};
-    capture.End();
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler option : " << retrieved_CO);
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  }
-  std::string data = capture.getData();
-  std::stringstream dataStream;
-  size_t codeSize;
-  HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
-  std::vector<char> codec(codeSize);
-  HIPRTC_CHECK(hiprtcGetCode(prog, codec.data()));
-  void* kernelParam[] = {reinterpret_cast<void*>(a_d),
-                         reinterpret_cast<void*>(x_d),
-                         reinterpret_cast<void*>(y_d)};
-  auto size = sizeof(kernelParam);
-  void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
-                              HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
-                              HIP_LAUNCH_PARAM_END};
-  hipModule_t module;
-  hipFunction_t function;
-  HIP_CHECK(hipModuleLoadData(&module, codec.data()));
-  HIP_CHECK(hipModuleGetFunction(&function, module, kername));
-  HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr,
-                                  kernel_parameter));
-  HIP_CHECK(hipDeviceSynchronize());
-  HIP_CHECK(hipModuleUnload(module));
-  HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
-  int times = 0;
-  if (data.find("contract <2 x half>", 0) != -1) {
-    times++;
-  }
-  int start = data.find("contract <2 x half>", 0) + 1;
-  while (data.find("contract <2 x half>", start) != -1) {
-    times++;
-    start = data.find("contract <2 x half>", start)+1;
-  }
-  if (times == 2) {
-    return 1;
-  } else if (times < 2) {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR CONTAIN 'fadd contract <2 x half>' " << times << "times");
-    WARN(" WHICH IS NOT EXPECTED(IT SHOULD BE PRESENT TWICE)");
-    return 0;
-  } else {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR CONTAIN 'fadd contract <2 x half>' " << times << "times");
-    WARN(" WHICH IS NOT EXPECTED(IT SHOULD BE PRESENT TWICE)");
-    return 0;
-  }
-}
-
-bool check_macro(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "macro";
-  std::string retrieved_CO = get_string_parameters("compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  picojson::array Expected_Results = get_array_parameters("Expected_Results",
-                                                           block_name);
-  const char* kername = kernel_name.c_str();
-  std::vector<double> double_vec_expected;
-  for (auto& indx : Expected_Results) {
-    double_vec_expected.push_back(indx.get<double>());
-  }
-  std::vector<int> Expected_Results_int;
-  for (auto& indx : double_vec_expected) {
-    Expected_Results_int.push_back(static_cast<int>(indx));
-  }
-  const char* compiler_option = retrieved_CO.c_str();
-  hiprtcProgram prog;
-  HIPRTC_CHECK(hiprtcCreateProgram(&prog, macro_string,
-                                                kername, 0, NULL, NULL));
-  if (Combination_CO_size != -1) {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
-                                                    Combination_CO)};
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler Option : " << compiler_option);
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  } else {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
-                                                   &compiler_option)};
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler Option : " << compiler_option);
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  }
-  int *macro_value_h;
-  int *macro_value_d;
-  macro_value_h = new int[1];
-  HIP_CHECK(hipMalloc(&macro_value_d, sizeof(int)));
-  *macro_value_h = 0;
-  HIP_CHECK(hipMemcpy(macro_value_d, macro_value_h, sizeof(int),
-            hipMemcpyHostToDevice));
-  size_t codeSize;
-  HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
-  std::vector<char> codec(codeSize);
-  hiprtcGetCode(prog, codec.data());
-  void* kernelParam[] = {macro_value_d};
-  auto size = sizeof(kernelParam);
-  void* kernel_parameter[]={HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
-                            HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
-                            HIP_LAUNCH_PARAM_END};
-  hipModule_t module;
-  hipFunction_t function;
-  HIP_CHECK(hipModuleLoadData(&module, codec.data()));
-  HIP_CHECK(hipModuleGetFunction(&function, module, kername));
-  HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr,
-                        kernel_parameter));
-  HIP_CHECK(hipMemcpy(macro_value_h, macro_value_d, sizeof(int),
-                      hipMemcpyDeviceToHost));
-  HIP_CHECK(hipDeviceSynchronize());
-  HIP_CHECK(hipModuleUnload(module));
-  HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
-  if (*macro_value_h != Expected_Results_int[0]) {
-    WARN("Compiler Option : " << compiler_option);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("EXPECTED RESULT DOES NOT MATCH");
-    WARN("INPUT: " << compiler_option);
-    WARN("EXPECTED OP : "<< Expected_Results_int[0]);
-    WARN("OBTAINED OP: "<< *macro_value_h);
-    return 0;
-  } else {
-    return 1;
-  }
-}
-
-bool check_undef_macro(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "undef_macro";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  picojson::array comp_opt = get_array_parameters("compiler_option",
-                                                  block_name);
-  if (comp_opt.size() < 2) {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::vector<std::string> compiler_option;
-  for (auto& indx : comp_opt) {
-    compiler_option.push_back(indx.get<std::string>());
-  }
-  std::vector<std::string> variable(compiler_option.size(), "");
-  const char** appended_compiler_options =
-                                     new const char*[compiler_option.size()];
-  for (int i = 0; i < compiler_option.size(); ++i) {
-    variable[i] = compiler_option[i];
-    appended_compiler_options[i] = variable[i].c_str();
-  }
-  hiprtcProgram prog;
-  HIPRTC_CHECK(hiprtcCreateProgram(&prog, undef_macro_string,
-                                                 kername, 0, NULL, NULL));
-  if (Combination_CO_size != -1) {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
-                                                    Combination_CO)};
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        if (log.find("undeclared identifier")) {
-          return 1;
-        }
-      } else {
-        WARN("Compiler Option : " << appended_compiler_options[1]);
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-        WARN("Expected error : 'undeclared identifier' NOT GENERATED");
-        return 0;
-      }
-    }
-  } else {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog,
-                                                    compiler_option.size(),
-                                                 appended_compiler_options)};
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        if (log.find("undeclared identifier")) {
-          return 1;
-        }
-      } else {
-        WARN("Compiler Option : " << appended_compiler_options[0]);
-        if (Combination_CO_size != -1) {
-          WARN("FAILED IN COMBINATION :");
-          for (int i = 0; i < Combination_CO_size; i++) {
-            WARN(Combination_CO[i]);
-          }
-        }
-        WARN("Expected error : 'undeclared identifier' NOT GENERATED");
-        return 0;
-      }
-    }
-  }
-  WARN("Compiler Option : " << appended_compiler_options[0]);
-  if (Combination_CO_size != -1) {
-    WARN("FAILED IN COMBINATION :");
-    for (int i = 0; i < Combination_CO_size; i++) {
-      WARN(Combination_CO[i]);
-    }
-  }
-  WARN("EXPECTED ERROR WAS NOT GENERATED");
-  return 0;
-}
-
-bool check_header_dir(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "header_dir";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string compiler_option = get_string_parameters("compiler_option",
-                                                 block_name);
-  if (compiler_option == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  picojson::array Headers = get_array_parameters("Headers", block_name);
-  picojson::array depending_comp_optn =
-                     get_array_parameters("depending_comp_optn", block_name);
-  picojson::array Src_headers =
-                             get_array_parameters("Src_headers", block_name);
-  picojson::array Input_Thrd_Vals =
-                              get_array_parameters("Input_Vals", block_name);
-  picojson::array Expected_Results =
-                        get_array_parameters("Expected_Results", block_name);
-  std::string str = "pwd";
-  const char *cmd = str.c_str();
-  CaptureStream capture(stdout);
-  capture.Begin();
-  system(cmd);
-  capture.End();
-  std::string wor_dir = capture.getData();
-  std::string break_dir = wor_dir.substr(0, wor_dir.find("build"));
-  std::string append_str = "catch/unit/rtc/headers";
-  std::string CO = compiler_option + " " + break_dir + append_str;
-  const char* appended_CO = CO.c_str();
-  std::vector<std::string> Headers_list;
-  for (auto& indx : Headers) {
-    Headers_list.push_back(indx.get<std::string>());
-  }
-  std::vector<std::string> Src_headers_list;
-  for (auto& indx : Src_headers) {
-    Src_headers_list.push_back(indx.get<std::string>());
-  }
-  std::vector<std::string> depending_co_list;
-  for (auto& indx : depending_comp_optn) {
-    depending_co_list.push_back(indx.get<std::string>());
-  }
-  std::vector<double> double_vec_target;
-  for (auto& indx : Input_Thrd_Vals) {
-    double_vec_target.push_back(indx.get<double>());
-  }
-  std::vector<int> Input_Thrd_Vals_int;
-  for (auto& indx : double_vec_target) {
-    Input_Thrd_Vals_int.push_back(static_cast<int>(indx));
-  }
-  std::vector<double> double_vec_expected;
-  for (auto& indx : Expected_Results) {
-    double_vec_expected.push_back(indx.get<double>());
-  }
-  std::vector<int> Expected_Results_int;
-  for (auto& indx : double_vec_expected) {
-    Expected_Results_int.push_back(static_cast<int>(indx));
-  }
-  std::vector<std::string> src_var_hdr_lst(Src_headers_list.size(), "");
-  const char** src_hder_lst = new const char*[Src_headers_list.size()];
-  for (int i = 0; i < Src_headers_list.size(); ++i) {
-    src_var_hdr_lst[i] = Src_headers_list[i];
-    src_hder_lst[i] = src_var_hdr_lst[i].c_str();
-  }
-  std::vector<std::string> var_hdr_lst(Headers_list.size(), "");
-  const char** hder_lst = new const char*[Headers_list.size()];
-  for (int i = 0; i < Headers_list.size(); ++i) {
-    var_hdr_lst[i] = Headers_list[i];
-    hder_lst[i] = var_hdr_lst[i].c_str();
-  }
-  for (int senario = 0; senario< Input_Thrd_Vals_int.size(); senario++) {
-    hiprtcProgram prog;
-    HIPRTC_CHECK(hiprtcCreateProgram(&prog, header_dir_string,
-                                                  kername, Headers_list.size(),
-                                                  src_hder_lst, hder_lst));
-    if (Combination_CO_size != -1) {
-      hiprtcResult compileResult{hiprtcCompileProgram(prog,
-                                                      Combination_CO_size,
-                                                      Combination_CO)};
-      if (!(compileResult == HIPRTC_SUCCESS)) {
-        WARN("Compiler Option : " << appended_CO);
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-        WARN("hiprtcCompileProgram() api failed!! with error code: ");
-        WARN(compileResult);
-        size_t logSize;
-        HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-        if (logSize) {
-          std::string log(logSize, '\0');
-          HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-          WARN(log);
-        }
-        return 0;
-      }
-    } else {
-      hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
-                                                      &appended_CO)};
-      if (!(compileResult == HIPRTC_SUCCESS)) {
-        WARN("Compiler Option : " << appended_CO);
-        WARN("hiprtcCompileProgram() api failed!! with error code: ");
-        WARN(compileResult);
-        size_t logSize;
-        HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-        if (logSize) {
-          std::string log(logSize, '\0');
-          HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-          WARN(log);
-        }
-        return 0;
-      }
-    }
-    size_t codeSize;
-    HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
-    std::vector<char> codec(codeSize);
-    HIPRTC_CHECK(hiprtcGetCode(prog, codec.data()));
-    int value_h = 0;
-    int* ptr_value_h = &value_h;
-    int input_h = Input_Thrd_Vals_int[senario];
-    int* ptr_input_h = &input_h;
-    int* value_d;
-    int* input_d;
-    HIP_CHECK(hipMalloc(&value_d, sizeof(int)));
-    HIP_CHECK(hipMalloc(&input_d, sizeof(int)));
-    HIP_CHECK(hipMemcpy(value_d, ptr_value_h, sizeof(int),
-                        hipMemcpyHostToDevice));
-    HIP_CHECK(hipMemcpy(input_d, ptr_input_h, sizeof(int),
-                        hipMemcpyHostToDevice));
-    void* kernelParam[] = {value_d, input_d};
-    auto size = sizeof(kernelParam);
-    void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
-                                HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
-                                HIP_LAUNCH_PARAM_END};
-    hipModule_t module;
-    hipFunction_t function;
-    HIP_CHECK(hipModuleLoadData(&module, codec.data()));
-    HIP_CHECK(hipModuleGetFunction(&function, module, kername));
-    HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr,
-                          kernel_parameter));
-    HIP_CHECK(hipMemcpy(ptr_value_h, value_d, sizeof(int),
-                        hipMemcpyDeviceToHost));
-    if (*ptr_value_h != Expected_Results_int[senario]) {
-      WARN("Compiler Option : " << appended_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN(" EXPECTED RESULT DOES NOT MATCH FOR " << senario);
-      WARN("th ITERATION (start iteration is 0 ) ");
-      WARN(" INPUT: " << Input_Thrd_Vals_int[senario]);
-      WARN(" EXPECTED OP: "<< Expected_Results_int[senario]);
-      WARN(" OBTAINED OP: "<< *ptr_value_h);
-      return 0;
-    }
-    HIP_CHECK(hipDeviceSynchronize());
-    HIP_CHECK(hipModuleUnload(module));
-    HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
-  }
-  return 1;
-}
-
-bool check_warning(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "warning";
-  std::string retrieved_CO =
-                         get_string_parameters("compiler_option", block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  const char* compiler_option = retrieved_CO.c_str();
-  hiprtcProgram prog;
-  HIPRTC_CHECK(hiprtcCreateProgram(&prog, warning_string, kername,
-                                                0, NULL, NULL));
-  if (Combination_CO_size != -1) {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
-                                                    Combination_CO)};
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler Option : " << compiler_option);
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  } else {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
-                                                    &compiler_option)};
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler Option : " << compiler_option);
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  }
-  size_t logSize;
-  HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-  if (logSize) {
-    std::string log(logSize, '\0');
-    HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-    if (-1 != log.find("#warning")) {
-      WARN("Compiler Option : " << compiler_option);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN(" WARNING MESSAGE IS PRINTING WHICH IS NOT SUPRESSED ");
-      return 0;
-    } else {
-      return 1;
-    }
-  } else {
-    return 1;
-  }
-}
-
-bool check_Rpass_inline(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "Rpass_inline";
-  std::string retrieved_CO =
-                         get_string_parameters("compiler_option", block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  const char* compiler_option = retrieved_CO.c_str();
-  hiprtcProgram prog;
-  HIPRTC_CHECK(hiprtcCreateProgram(&prog, max_thread_string,
-                                                kername, 0, NULL, NULL));
-  if (Combination_CO_size != -1) {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
-                                                    Combination_CO)};
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler Option : " << compiler_option);
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  } else {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
-                                                  &compiler_option)};
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler Option : " << compiler_option);
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  }
-  size_t logSize;
-  HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-  if (logSize) {
-    std::string log(logSize, '\0');
-    HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-    if (log.find("inlined into")) {
-      return 1;
-    } else {
-      WARN("Compiler Option : " << compiler_option);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("EXPECTED STRING 'inlined into' IS NOT PRESENT IN LOG ");
-      return 0;
-    }
-  } else {
-    WARN("Compiler Option : " << compiler_option);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN(" LOG WITH EXPECTED STRING 'inlined into' IS NOT PRESENT ");
-    return 0;
-  }
-}
-
-bool check_conversionerror_enabled(const char** Combination_CO,
-                                int Combination_CO_size, int max_thread_pos,
-                                int fast_math_present) {
-  std::string block_name = "error";
-  picojson::array retrieved_CO = get_array_parameters("compiler_option",
-                                                          block_name);
-  if (retrieved_CO.size() < 4) {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::vector<std::string> CO_vec;
-  for (auto& indx : retrieved_CO) {
-    CO_vec.push_back(indx.get<std::string>());
-  }
-  std::string variable = CO_vec[0];
-  const char* compiler_option = variable.c_str();
-  hiprtcProgram prog;
-  HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string,
-                                                kername, 0, NULL, NULL));
-  if (Combination_CO_size != -1) {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
-                                                    Combination_CO)};
-  } else {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
-                                                     &compiler_option)};
-  }
-  size_t logSize;
-  HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-  if (logSize) {
-    std::string log(logSize, '\0');
-    HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-    std::string variable = "error";
-    if (-1 != log.find(variable)) {
-      return 1;
-    } else {
-      WARN("Compiler Option : " << compiler_option);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("ERROR MSG : '" << variable <<"' NOT FOUND");
-      return 0;
-    }
-  } else {
-    WARN("Compiler Option : " << compiler_option);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("LOG IS NOT GENERATED");
-    WARN("maybe due to presence of '-w' compiler option");
-    return 0;
-  }
-}
-
-bool check_conversionerror_disabled(const char** Combination_CO,
-                                 int Combination_CO_size, int max_thread_pos,
-                                 int fast_math_present) {
-  std::string block_name = "error";
-  picojson::array retrieved_CO = get_array_parameters("compiler_option",
-                                                          block_name);
-  if (retrieved_CO.size() < 4) {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::vector<std::string> CO_vec;
-  for (auto& indx : retrieved_CO) {
-    CO_vec.push_back(indx.get<std::string>());
-  }
-  std::string variable = CO_vec[1];
-  const char* compiler_option = variable.c_str();
-  hiprtcProgram prog;
-  HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string,
-                                                kername, 0, NULL, NULL));
-  if (Combination_CO_size != -1) {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
-                                                    Combination_CO)};
-  } else {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
-                                                     &compiler_option)};
-  }size_t logSize;
-  HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-  if (logSize) {
-    std::string log(logSize, '\0');
-    HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-    if (-1 != log.find("error")) {
-      WARN("Compiler Option : " << compiler_option);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("LOG IS PRESENT WITH ERROR WHICH IS NOT EXPECTED : ");
-      WARN("maybe due to presence of '-w' compiler option");
-      return 0;
-    } else {
-      return 1;
-    }
-  } else {
-    return 1;
-  }
-}
-
-bool check_conversionwarning_enabled(const char** Combination_CO,
-                                   int Combination_CO_size, int max_thread_pos,
-                                   int fast_math_present) {
-  std::string block_name = "error";
-  picojson::array retrieved_CO = get_array_parameters("compiler_option",
-                                                          block_name);
-  if (retrieved_CO.size() < 4) {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::vector<std::string> CO_vec;
-  for (auto& indx : retrieved_CO) {
-    CO_vec.push_back(indx.get<std::string>());
-  }
-  std::string variable = CO_vec[2];
-  const char* compiler_option = variable.c_str();
-  hiprtcProgram prog;
-  HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string,
-                                                kername, 0, NULL, NULL));
-  if (Combination_CO_size != -1) {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
-                                                    Combination_CO)};
-  } else {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
-                                                     &compiler_option)};
-  }size_t logSize;
-  HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-  if (logSize) {
-    std::string log(logSize, '\0');
-    HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-    std::string variable = "warning";
-    if (-1 != log.find(variable)) {
-      return 1;
-    } else {
-      WARN("Compiler Option : " << compiler_option);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("LOG DOESN'T CONTAIN WARNING AS EXP : " << compiler_option);
-      return 0;
-    }
-  } else {
-    WARN("Compiler Option : " << compiler_option);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("LOG IS NOT GENERATED");
-    return 0;
-  }
-}
-
-bool check_conversionwarning_disabled(const char** Combination_CO,
-                                      int Combination_CO_size,
-                                      int max_thread_pos,
-                                      int fast_math_present) {
-  std::string block_name = "error";
-  picojson::array retrieved_CO = get_array_parameters("compiler_option",
-                                                          block_name);
-  if (retrieved_CO.size() < 4) {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::vector<std::string> CO_vec;
-  for (auto& indx : retrieved_CO) {
-    CO_vec.push_back(indx.get<std::string>());
-  }
-  std::string variable = CO_vec[3];
-  const char* compiler_option = variable.c_str();
-  hiprtcProgram prog;
-  HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string,
-                                                kername, 0, NULL, NULL));
-  if (Combination_CO_size != -1) {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
-                                                    Combination_CO)};
-  } else {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
-                                                     &compiler_option)};
-  }size_t logSize;
-  HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-  if (logSize) {
-    std::string log(logSize, '\0');
-    HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-    if (-1 != log.find("warning")) {
-      WARN("Compiler Option : " << compiler_option);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("WARNING IS GENERATED WHICH IS NOT EXPECTED");
-      WARN(compiler_option);
-      return 0;
-    } else {
-      return 1;
-    }
-  } else {
-    return 1;
-  }
-}
-
-bool check_max_thread(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "max_thread";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  std::string default_CO = get_string_parameters("kernel_name", block_name);
-  picojson::array Target_Thrd_Vals = get_array_parameters("Target_Vals",
-                                                          block_name);
-  picojson::array Input_Thrd_Vals = get_array_parameters("Input_Vals",
-                                                          block_name);
-  picojson::array Expected_Results = get_array_parameters("Expected_Results",
-                                                          block_name);
-  const char* kername = kernel_name.c_str();
-  std::string compiler_option = get_string_parameters("compiler_option",
-                                                 block_name);
-  if (compiler_option == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::vector<double> double_vec_target;
-  for (auto& indx : Target_Thrd_Vals) {
-    double_vec_target.push_back(indx.get<double>());
-  }
-  std::vector<int> Target_Thrd_Vals_int;
-  for (auto& indx : double_vec_target) {
-    Target_Thrd_Vals_int.push_back(static_cast<int>(indx));
-  }
-  int a = 0;
-  std::vector<std::string> variable(Target_Thrd_Vals_int.size(), "");
-  const char** appended_compiler_options =
-                                 new const char*[Target_Thrd_Vals_int.size()];
-  for (int i = 0; i < Target_Thrd_Vals_int.size() ; i++) {
-    variable[i] = compiler_option + std::to_string(Target_Thrd_Vals_int[i]);
-    appended_compiler_options[i] = variable[i].c_str();
-  }
-  std::vector<double> double_vec_input;
-  for (auto& indx : Input_Thrd_Vals) {
-    double_vec_input.push_back(indx.get<double>());
-  }
-  std::vector<int> Input_Thrd_Vals_int;
-  for (auto& indx : double_vec_input) {
-    Input_Thrd_Vals_int.push_back(static_cast<int>(indx));
-  }
-  std::vector<double> double_vec_expected;
-  for (auto& indx : Expected_Results) {
-    double_vec_expected.push_back(indx.get<double>());
-  }
-  std::vector<int> Expected_Results_int;
-  for (auto& indx : double_vec_expected) {
-    Expected_Results_int.push_back(static_cast<int>(indx));
-  }
-  int pass_count = 0;
-  int inc = (Input_Thrd_Vals_int.size()/Target_Thrd_Vals_int.size());
-  int start = 0;
-  int check, test_case;
-  for (int senario = 0; senario < Target_Thrd_Vals_int.size(); senario++) {
-    if (Target_Thrd_Vals_int[senario] == 0) {
-      check = 0;
-      for (test_case = start; test_case< (start+inc); test_case++) {
-        if (check == Expected_Results_int[test_case]) {
-          pass_count++;
-        }
-      }
-      start+= inc;
-      continue;
-    }
-    hiprtcProgram prog;
-    HIPRTC_CHECK(hiprtcCreateProgram(&prog, max_thread_string,
-                                                   kername, 0, NULL, NULL));
-    if (Combination_CO_size != -1) {
-      std::string max_thread_string = variable[senario];
-      Combination_CO[max_thread_pos] = max_thread_string.c_str();
-      hiprtcResult compileResult{hiprtcCompileProgram(prog,
-                                                      Combination_CO_size,
-                                                      Combination_CO)};
-      if (!(compileResult == HIPRTC_SUCCESS)) {
-        WARN("Compiler Option : " << appended_compiler_options[senario]);
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-        WARN("hiprtcCompileProgram() api failed!! with error code: ");
-        WARN(compileResult);
-        size_t logSize;
-        HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-        if (logSize) {
-          std::string log(logSize, '\0');
-          HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-          WARN(log);
-        }
-        return 0;
-      }
-    } else {
-      hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
-                                       &appended_compiler_options[senario])};
-      if (!(compileResult == HIPRTC_SUCCESS)) {
-        WARN("Compiler Option : " << appended_compiler_options[senario]);
-        WARN("hiprtcCompileProgram() api failed!! with error code: ");
-        WARN(compileResult);
-        size_t logSize;
-        HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-        if (logSize) {
-          std::string log(logSize, '\0');
-          HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-          WARN(log);
-        }
-        return 0;
-      }
-    }
-    size_t codeSize;
-    HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
-    std::vector<char> codec(codeSize);
-    HIPRTC_CHECK(hiprtcGetCode(prog, codec.data()));
-    for (test_case = start; test_case< (start+inc); test_case++) {
-      int num_threads_h = 0;
-      int* ptr_num_threads_h = &num_threads_h;
-      int* Thread_count_d;
-      HIP_CHECK(hipMalloc(&Thread_count_d, sizeof(int)));
-      HIP_CHECK(hipMemcpy(Thread_count_d, ptr_num_threads_h, sizeof(int),
-                hipMemcpyHostToDevice));
-      void* kernelParam[] = {Thread_count_d};
-      auto size = sizeof(kernelParam);
-      void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
-                          HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
-                          HIP_LAUNCH_PARAM_END};
-      hipModule_t module;
-      hipFunction_t function;
-      HIP_CHECK(hipModuleLoadData(&module, codec.data()));
-      HIP_CHECK(hipModuleGetFunction(&function, module, kername));
-      hipError_t status = hipModuleLaunchKernel(function, 1, 1, 1,
-                                                Input_Thrd_Vals_int[test_case],
-                                                1, 1, 0, 0, nullptr,
-                                                kernel_parameter);
-      HIP_CHECK(hipMemcpy(ptr_num_threads_h, Thread_count_d, sizeof(int),
-                          hipMemcpyDeviceToHost));
-      if ((status == hipSuccess) &&
-          (num_threads_h <= Target_Thrd_Vals_int[senario])) {
-        check = 1;
-      } else {
-        check = 0;
-      }
-      if (check != Expected_Results_int[test_case]) {
-        WARN("Compiler Option : " << appended_compiler_options[senario]);
-        if (Combination_CO_size != -1) {
-          WARN("FAILED IN COMBINATION :");
-          std::string max_thread_string = variable[senario];
-          Combination_CO[max_thread_pos] = max_thread_string.c_str();
-          for (int i = 0; i < Combination_CO_size; i++) {
-            WARN(Combination_CO[i]);
-          }
-        }
-        WARN("EXPECTED RESULT DOES NOT MATCH FOR " << test_case);
-        WARN("th ITERATION (start iteration is 0 ) ");
-        WARN("IP THREAD VAL: " << Input_Thrd_Vals_int[test_case]);
-        WARN("EXPECTED OP: "<< Expected_Results_int[test_case]);
-        WARN("OBTAINED OP: "<< check);
-        return 0;
-      }
-      HIP_CHECK(hipDeviceSynchronize());
-      HIP_CHECK(hipModuleUnload(module));
-    }
-    start+=inc;
-    HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
-  }
-  return 1;
-}
-
-bool check_unsafe_atomic_enabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "unsafe_atomic";
-  std::string compiler_option = get_string_parameters("compiler_option",
-                                                 block_name);
-  if (compiler_option == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  const char *compiler_option_cstr = compiler_option.c_str();
-  float *A_d;
-  const int N = 1000;
-  float A_h[N];
-  float Nbytes = N * sizeof(float);
-  double sum_w = 0, sum_wo = 0, sum_tocheck = 0;
-  for (int i = 0; i < N; i++) {
-    A_h[i] = 0.1f;
-    sum_tocheck += A_h[i] + 0.2f;
-  }
-  HIP_CHECK(hipMalloc(&A_d, Nbytes));
-  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
-  for (int senario = 0; senario < 2; senario ++) {
-    hiprtcProgram prog;
-    HIPRTC_CHECK(hiprtcCreateProgram(&prog, unsafe_atomic_string,
-                                                  kername, 0, NULL, NULL));
-    if (Combination_CO_size != -1) {
-      hiprtcResult compileResult{hiprtcCompileProgram(prog,
-                                                      Combination_CO_size,
-                                                      Combination_CO)};
-      if (!(compileResult == HIPRTC_SUCCESS)) {
-        WARN("Compiler Option : " << compiler_option);
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-        WARN("hiprtcCompileProgram() api failed!! with error code: ");
-        WARN(compileResult);
-        size_t logSize;
-        HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-        if (logSize) {
-          std::string log(logSize, '\0');
-          HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-          WARN(log);
-        }
-        return 0;
-      }
-    } else {
-      hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
-                                                      &compiler_option_cstr)};
-      if (!(compileResult == HIPRTC_SUCCESS)) {
-        WARN("Compiler Option : " << compiler_option);
-        WARN("hiprtcCompileProgram() api failed!! with error code: ");
-        WARN(compileResult);
-          size_t logSize;
-        HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-        if (logSize) {
-          std::string log(logSize, '\0');
-          HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-          WARN(log);
-        }
-        return 0;
-      }
-    }
-    size_t codeSize;
-    HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
-    std::vector<char> codec(codeSize);
-    HIPRTC_CHECK(hiprtcGetCode(prog, codec.data()));
-    void* kernelParam[] = {A_d};
-    auto size = sizeof(kernelParam);
-    void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
-                                HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
-                                HIP_LAUNCH_PARAM_END};
-    hipModule_t module;
-    hipFunction_t function;
-    HIP_CHECK(hipModuleLoadData(&module, codec.data()));
-    HIP_CHECK(hipModuleGetFunction(&function, module, kername));
-    HIP_CHECK(hipModuleLaunchKernel(function, N, 1, 1, N, 1, 1, 0, 0,
-                                    nullptr, kernel_parameter));
-    HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
-    for (int i = 0; i < N; i++) {
-      if (senario == 0) {
-        sum_wo += A_h[i];
-      } else {
-        sum_w += A_h[i];
-      }
-    }
-    HIP_CHECK(hipDeviceSynchronize());
-    HIP_CHECK(hipModuleUnload(module));
-    HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
-  }
-  if (sum_w != sum_tocheck) {
-    return 1;
-  } else {
-    WARN("Compiler Option : " << compiler_option);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("EXPECTED : " << sum_w << " != " << sum_tocheck);
-    return 0;
-  }
-}
-
-bool check_unsafe_atomic_disabled(const char** Combination_CO,
-                                 int Combination_CO_size, int max_thread_pos,
-                                 int fast_math_present) {
-  std::string block_name = "unsafe_atomic";
-  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  const char* compiler_option = retrieved_CO.c_str();
-  float *A_d;
-  const int N = 1000;
-  float A_h[N];
-  float Nbytes = N * sizeof(float);
-  double sum = 0, sum_tocheck = 0;
-  for (int i = 0; i < N; i++) {
-    A_h[i] = 0.1f;
-    sum_tocheck += A_h[i] + 0.2f;
-  }
-  HIP_CHECK(hipMalloc(&A_d, Nbytes));
-  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
-  hiprtcProgram prog;
-  HIPRTC_CHECK(hiprtcCreateProgram(&prog, unsafe_atomic_string,
-                                                kername, 0, NULL, NULL));
-  if (Combination_CO_size != -1) {
-      hiprtcResult compileResult{hiprtcCompileProgram(prog,
-                                                      Combination_CO_size,
-                                                      Combination_CO)};
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler Option : " << compiler_option);
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  } else {
-    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, &compiler_option)};
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler Option : " << compiler_option);
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return 0;
-    }
-  }
-  size_t codeSize;
-  HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
-  std::vector<char> codec(codeSize);
-  HIPRTC_CHECK(hiprtcGetCode(prog, codec.data()));
-  void* kernelParam[] = {A_d};
-  auto size = sizeof(kernelParam);
-  void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
-                              HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
-                              HIP_LAUNCH_PARAM_END};
-  hipModule_t module;
-  hipFunction_t function;
-  HIP_CHECK(hipModuleLoadData(&module, codec.data()));
-  HIP_CHECK(hipModuleGetFunction(&function, module, kername));
-  HIP_CHECK(hipModuleLaunchKernel(function, N, 1, 1, N, 1, 1, 0, 0,
-                                  nullptr, kernel_parameter));
-  HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
-  for (int i = 0; i < N; i++) {
-    sum += A_h[i];
-  }
-  HIP_CHECK(hipDeviceSynchronize());
-  HIP_CHECK(hipModuleUnload(module));
-  HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
-  if (sum == sum_tocheck) {
-    return 1;
-  } else {
-    WARN("Compiler Option : " << compiler_option);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("EXPECTED RESULT IS NOT OBTAINED ");
-    WARN("EXPECTED RESULT: "<< sum_tocheck);
-    WARN("OBTAINED RESULT: "<< sum);
-    return 0;
-  }
-}
-
-bool check_infinite_num_enabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "infinite_num";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string retrieved_CO = get_string_parameters("compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  int CO_IRadded_size = 3, a = 0;
-  const char** CO_IRadded = new const char*[3];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                 Combination_CO, Combination_CO_size);
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (fast_math_present != -1) {
-    if (fast_math_present == 0 && data.find("contract") != -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-     }
-      WARN("IR DOESN'T CONTAIN 'contract' ");
-      return 0;
-    }
-  } else {
-    if (data.find("ninf")!= -1) {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-     }
-      WARN("IR DOESN'T CONTAIN 'ninf' ");
-      return 0;
-    } else {
-      return 1;
-    }
-  }
-}
-
-bool check_infinite_num_disabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "infinite_num";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  int CO_IRadded_size = 3, a = 0;
-  const char** CO_IRadded = new const char*[3];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                 Combination_CO, Combination_CO_size);
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (fast_math_present != -1) {
-    if (fast_math_present == 1 && data.find("fmul fast")!= -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-     }
-      WARN("IR DOESN'T CONTAIN 'fmul fast' ");
-      return 0;
-    }
-  } else {
-    if (data.find("ninf")!= -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN 'ninf' ");
-      return 0;
-    }
-  }
-}
-
-bool check_NAN_num_enabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "NAN_num";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string retrieved_CO = get_string_parameters("compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
-    WARN(block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  int CO_IRadded_size = 3, a = 0;
-  const char** CO_IRadded = new const char*[3];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                 Combination_CO, Combination_CO_size);
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (fast_math_present!= -1) {
-    if (fast_math_present == 0 && data.find("contract")!= -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN 'contract' ");
-      return 0;
-    }
-  } else {
-    if (data.find("nnan")!= -1) {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN 'nnan' ");
-      return 0;
-    } else {
-      return 1;
-    }
-  }
-}
-
-bool check_NAN_num_disabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "NAN_num";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  int CO_IRadded_size = 3, a = 0;
-  const char** CO_IRadded = new const char*[3];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                 Combination_CO, Combination_CO_size);
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (fast_math_present!= -1) {
-    if (fast_math_present == 1 && data.find("fmul fast")!= -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN 'fmul fast' ");
-      return 0;
-    }
-  } else {
-    if (data.find("nnan")!= -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN 'nnan' ");
-      return 0;
-    }
-  }
-}
-
-bool check_finite_math_enabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "finite_math";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string retrieved_CO = get_string_parameters("compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  int CO_IRadded_size = 3, a = 0;
-  const char** CO_IRadded = new const char*[3];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                 Combination_CO, Combination_CO_size);
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (fast_math_present!= -1) {
-    if (fast_math_present == 1 && data.find("fmul fast")!= -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN 'fmul fast'");
-      return 0;
-    }
-  } else {
-    if (data.find("nnan")!= -1 && (data.find("ninf") != -1)) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN 'nnan' or 'ninf' or both ");
-      return 0;
-    }
-  }
-}
-
-bool check_finite_math_disabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "finite_math";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  int CO_IRadded_size = 3, a = 0;
-  const char** CO_IRadded = new const char*[3];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                 Combination_CO, Combination_CO_size);
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (fast_math_present!= -1) {
-    if (fast_math_present == 0 && data.find("contract")!= -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN 'contract'");
-      return 0;
-    }
-  } else {
-    if (data.find("nnan")!= -1 && (data.find("ninf") != -1)) {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR CONTAIN 'nnan' or 'ninf' or both WHICH IS NOT EXPECTED ");
-      return 0;
-    } else {
-      return 1;
-    }
-  }
-}
-
-bool check_associative_math_enabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "associative_math";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string retrieved_CO = get_string_parameters("compiler_option",
-                                                  block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  int CO_IRadded_size = 4, a = 0;
-  const char** CO_IRadded = new const char*[4];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-fno-signed-zeros";
-  CO_IRadded[2] = "-mllvm";
-  CO_IRadded[3] = "-print-after=constmerge";
-  std::string data;
-  if (Combination_CO_size != -1) {
-    int Combination_CO_IRadded_size = Combination_CO_size+1;
-    int b = 0;
-    std::vector<std::string> add_ir_forcombi(Combination_CO_size + 1, "");
-    const char** Combination_CO_IRadded =
-                                   new const char*[Combination_CO_size+1];
-    for (int i = 0; i < Combination_CO_size+1; ++i) {
-      if (i == Combination_CO_size) {
-        Combination_CO_IRadded[i] = "-fno-signed-zeros";
-        break;
-      }
-      add_ir_forcombi[i] = Combination_CO[b];
-      Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str();
-      b++;
-    }
-    data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                                      Combination_CO_IRadded,
-                                                Combination_CO_IRadded_size);
-  } else {
-    data = checking_IR(kername, CO_IRadded, CO_IRadded_size, Combination_CO,
-                                                         Combination_CO_size);
-  }
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (fast_math_present!= -1) {
-    if (fast_math_present == 1 && data.find("fmul fast")!= -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN 'fmul fast' ");
-      return 0;
-    }
-  } else {
-    if (data.find("reassoc") != -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN 'reassoc' ");
-      WARN(data);
-      return 0;
-    }
-  }
-}
-
-bool check_associative_math_disabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "associative_math";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
-                                                  block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  int CO_IRadded_size = 4, a = 0;
-  const char** CO_IRadded = new const char*[4];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-fno-signed-zeros";
-  CO_IRadded[2] = "-mllvm";
-  CO_IRadded[3] = "-print-after=constmerge";
-  std::string data;
-  if (Combination_CO_size != -1) {
-    int Combination_CO_IRadded_size = Combination_CO_size+1;
-    int b = 0;
-    std::vector<std::string> add_ir_forcombi(Combination_CO_size + 1, "");
-    const char** Combination_CO_IRadded =
-                                   new const char*[Combination_CO_size+1];
-    for (int i = 0; i < Combination_CO_size+1; ++i) {
-      if (i == Combination_CO_size) {
-        Combination_CO_IRadded[i] = "-fno-signed-zeros";
-        break;
-      }
-      add_ir_forcombi[i] = Combination_CO[b];
-      Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str();
-      b++;
-    }
-    data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                                      Combination_CO_IRadded,
-                                                Combination_CO_IRadded_size);
-  } else {
-    data = checking_IR(kername, CO_IRadded, CO_IRadded_size, Combination_CO,
-                                                         Combination_CO_size);
-  }
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (fast_math_present!= -1) {
-    if (fast_math_present == 0 && data.find("contract")!= -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN 'contract' ");
-      return 0;
-    }
-  } else {
-    if (data.find("reassoc")!= -1) {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR CONTAIN 'reassoc' WHICH IS NOT EXPECTED ");
-      return 0;
-    } else {
-      return 1;
-    }
-  }
-}
-
-bool check_signed_zeros_enabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "signed_zeros";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string retrieved_CO = get_string_parameters("compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  int CO_IRadded_size = 3, a = 0;
-  const char** CO_IRadded = new const char*[3];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                 Combination_CO, Combination_CO_size);
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (fast_math_present!= -1) {
-    if (fast_math_present == 0 && data.find("contract")!= -1) {
-      return 1;
-    } else {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR DOESN'T CONTAIN 'contract' ");
-    return 0;
-    }
-  } else {
-    if (data.find("nsz") != -1) {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR CONTAIN 'nsz' WHICH IS NOT EXPECTED ");
-      return 0;
-    } else {
-      return 1;
-    }
-  }
-}
-
-bool check_signed_zeros_disabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "signed_zeros";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  int CO_IRadded_size = 3, a = 0;
-  const char** CO_IRadded = new const char*[3];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                 Combination_CO, Combination_CO_size);
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (fast_math_present!= -1) {
-    if (fast_math_present == 1 && data.find("fmul fast")!= -1) {
-      return 1;
-    } else {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR DOESN'T CONTAIN 'fmul fast' ");
-    return 0;
-    }
-  } else {
-    if (data.find("nsz") != -1) {
-      return 1;
-    } else {
-      WARN("Compiler option : " << retrieved_CO);
-      if (Combination_CO_size != -1) {
-        WARN("FAILED IN COMBINATION :");
-        for (int i = 0; i < Combination_CO_size; i++) {
-          WARN(Combination_CO[i]);
-        }
-      }
-      WARN("IR DOESN'T CONTAIN 'nsz' ");
-      return 0;
-    }
-  }
-}
-
-bool check_trapping_math_enabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "trapping_math";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string retrieved_CO = get_string_parameters("compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  int CO_IRadded_size = 3, a = 0;
-  const char** CO_IRadded = new const char*[3];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                 Combination_CO, Combination_CO_size);
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (data.find("\"no-trapping-math\"=\"true\"") != -1) {
-    return 1;
-  } else {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR DOESN'T CONTAIN '\"no-trapping-math\"=\"true\"'");
-    return 0;
-  }
-}
-
-bool check_trapping_math_disabled(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present) {
-  std::string block_name = "trapping_math";
-  std::string kernel_name = get_string_parameters("kernel_name", block_name);
-  const char* kername = kernel_name.c_str();
-  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
-                                                 block_name);
-  if (retrieved_CO == "") {
-    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    return 0;
-  }
-  int CO_IRadded_size = 3, a = 0;
-  const char** CO_IRadded = new const char*[3];
-  CO_IRadded[0] = retrieved_CO.c_str();
-  CO_IRadded[1] = "-mllvm";
-  CO_IRadded[2] = "-print-after=constmerge";
-  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
-                                 Combination_CO, Combination_CO_size);
-  if (data == "") {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR NOT GENERATED");
-    return 0;
-  }
-  if (data.find("\"no-trapping-math\"=\"true\"") != -1) {
-    return 1;
-  } else {
-    WARN("Compiler option : " << retrieved_CO);
-    if (Combination_CO_size != -1) {
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-    }
-    WARN("IR DOESN'T CONTAIN '\"no-trapping-math\"=\"true\"'");
-    return 0;
-  }
-}
-
-std::string checking_IR(const char* kername, const char** extra_CO_IRadded,
-                    int extra_CO_IRadded_size, const char** Combination_CO,
-                    int Combination_CO_size) {
-  float *A_d, *B_d, *C_d;
-  float *A_h, *B_h, *C_h, *result;
-  float Nbytes = sizeof(float);
-  A_h = new float[1];
-  B_h = new float[1];
-  C_h = new float[1];
-  result = new float[1];
-  for (int i = 0; i < 1; i++) {
-    A_h[i] = 0.1f;
-    B_h[i] = 0.1f;
-    C_h[i] = 0.1f;
-    result[i] = 0.2f;
-  }
-  HIP_CHECK(hipMalloc(&A_d, Nbytes));
-  HIP_CHECK(hipMalloc(&B_d, Nbytes));
-  HIP_CHECK(hipMalloc(&C_d, Nbytes));
-  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
-  HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
-  HIP_CHECK(hipMemcpy(C_d, C_h, Nbytes, hipMemcpyHostToDevice));
-  hiprtcProgram prog;
-  HIPRTC_CHECK(hiprtcCreateProgram(&prog, ffp_contract_string,
-                                                kername, 0, NULL, NULL));
-  int Combination_CO_IRadded_size;
-  CaptureStream capture(stderr);
-  if (Combination_CO_size != -1) {
-    Combination_CO_IRadded_size = Combination_CO_size+2;
-    int b = 0;
-    std::vector<std::string> add_ir_forcombi(Combination_CO_size + 2, "");
-    const char** Combination_CO_IRadded =
-                                  new const char*[Combination_CO_size+2];
-    for (int i = 0; i < Combination_CO_size+2; ++i) {
-      if (i == Combination_CO_size) {
-        Combination_CO_IRadded[i] = "-mllvm";
-        Combination_CO_IRadded[i+1] = "-print-after=constmerge";
-        break;
-      }
-      add_ir_forcombi[i] = Combination_CO[b];
-      Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str();
-      b++;
-    }
-    capture.Begin();
-    hiprtcResult compileResult{hiprtcCompileProgram(prog,
-                                                   Combination_CO_IRadded_size,
-                                                    Combination_CO_IRadded)};
-    capture.End();
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("Compiler option : " <<  extra_CO_IRadded[0]);
-      WARN("FAILED IN COMBINATION :");
-      for (int i = 0; i < Combination_CO_size; i++) {
-        WARN(Combination_CO[i]);
-      }
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return "";
-    }
-  } else {
-    capture.Begin();
-    hiprtcResult compileResult{hiprtcCompileProgram(prog,
-                                                    extra_CO_IRadded_size,
-                                                    extra_CO_IRadded)};
-    capture.End();
-    if (!(compileResult == HIPRTC_SUCCESS)) {
-      WARN("hiprtcCompileProgram() api failed!! with error code: ");
-      WARN(compileResult);
-      size_t logSize;
-      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
-      if (logSize) {
-        std::string log(logSize, '\0');
-        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
-        WARN(log);
-      }
-      return"";
-    }
-  }
-  size_t codeSize;
-  HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
-  std::vector<char> codec(codeSize);
-  HIPRTC_CHECK(hiprtcGetCode(prog, codec.data()));
-  void* kernelParam[] = {A_d, B_d, C_d};
-  auto size = sizeof(kernelParam);
-  void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
-                              HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
-                              HIP_LAUNCH_PARAM_END};
-  hipModule_t module;
-  hipFunction_t function;
-  HIP_CHECK(hipModuleLoadData(&module, codec.data()));
-  HIP_CHECK(hipModuleGetFunction(&function, module, kername));
-  HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr,
-                                  kernel_parameter));
-  HIP_CHECK(hipMemcpy(result, C_d, Nbytes, hipMemcpyDeviceToHost));
-  for (int i = 0; i< 1; i++) {
-    if (result[i] != ((A_h[i] * B_h[i]) + C_h[i])) {
-      return "";
-    }
-  }
-  std::string data = capture.getData();
-  std::stringstream dataStream;
-  HIP_CHECK(hipModuleUnload(module));
-  HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
-  return data;
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+This file contains functions for idividual HIPRTC supported compiler options
+validation. For PASS senario the function returns 1 or 0 otherwise.
+*/
+
+#include <hip/hiprtc.h>
+#include <hip/hip_runtime.h>
+#include <hip/hip_fp16.h>
+#include <picojson.h>
+#include <fstream>
+#include <vector>
+#include <string>
+#include <iostream>
+#include "headers/RtcUtility.h"
+#include "headers/RtcFunctions.h"
+#include "headers/RtcKernels.h"
+#include <hip_test_common.hh>
+#include "headers/printf_common.h"
+
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+bool check_architecture(const char** Combination_CO,
+                        int Combination_CO_size, int max_thread_pos,
+                        int fast_math_present) {
+  std::string block_name = "architecture";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string retrieved_CO = get_string_parameters("compiler_option",
+                                                   block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  hipDeviceProp_t prop;
+  HIP_CHECK(hipGetDeviceProperties(&prop, 0));
+  std::string actual_architecture = prop.gcnArchName;
+  std::string complete_CO = retrieved_CO + actual_architecture;
+  const char* compiler_option = complete_CO.c_str();
+  hiprtcProgram prog;
+  HIPRTC_CHECK(hiprtcCreateProgram(&prog, max_thread_string,
+                                           kername, 0, NULL, NULL));
+  if (Combination_CO_size != -1) {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog,
+                                                    Combination_CO_size,
+                                                    Combination_CO)};
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler Option : " << compiler_option);
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  } else {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
+                                                   &compiler_option)};
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler Option : " << compiler_option);
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  }
+  return 1;
+}
+
+bool check_rdc(const char** Combination_CO, int Combination_CO_size,
+               int max_thread_pos, int fast_math_present) {
+  std::string block_name = "rdc";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string CO = get_string_parameters("compiler_option",
+                                                  block_name);
+  if (CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  const char* compiler_opt = CO.c_str();
+  float *A_d, *B_d, *C_d;
+  float *A_h, *B_h, *C_h, *result;
+  float Nbytes = sizeof(float);
+  A_h = new float[1];
+  B_h = new float[1];
+  C_h = new float[1];
+  result = new float[1];
+  for (int i = 0; i < 1; i++) {
+    A_h[i] = 4;
+    B_h[i] = 4;
+    result[i] = 16;
+  }
+  HIP_CHECK(hipMalloc(&A_d, Nbytes));
+  HIP_CHECK(hipMalloc(&B_d, Nbytes));
+  HIP_CHECK(hipMalloc(&C_d, Nbytes));
+  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
+  HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
+  hiprtcProgram prog;
+  HIPRTC_CHECK(hiprtcCreateProgram(&prog, rdc_string, kername, 0, NULL, NULL));
+  if (Combination_CO_size != -1) {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
+                                                    Combination_CO)};
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler Option : " << compiler_opt);
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  } else {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, &compiler_opt)};
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler Option : " << compiler_opt);
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  }
+  void* kernelParam[] = {A_d, B_d, C_d};
+  auto size = sizeof(kernelParam);
+  void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
+                              HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
+                              HIP_LAUNCH_PARAM_END};
+  size_t codeSize;
+  HIPRTC_CHECK(hiprtcGetBitcodeSize(prog, &codeSize));
+  std::vector<char> codec(codeSize);
+  HIPRTC_CHECK(hiprtcGetBitcode(prog, codec.data()));
+  float wall_time;
+  int reg_count = 2;
+  int max_thread = 1;
+  unsigned int log_size = 5120;
+  char error_log[5120];
+  char info_log[5120];
+  std::vector<hiprtcJIT_option> jit_options = {HIPRTC_JIT_MAX_REGISTERS,
+                                               HIPRTC_JIT_THREADS_PER_BLOCK,
+                                               HIPRTC_JIT_WALL_TIME,
+                                               HIPRTC_JIT_INFO_LOG_BUFFER,
+                                       HIPRTC_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
+                                               HIPRTC_JIT_ERROR_LOG_BUFFER,
+                                       HIPRTC_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
+                                               HIPRTC_JIT_LOG_VERBOSE};
+  const void* lopts[] = {reinterpret_cast<void*>(&reg_count),
+                         reinterpret_cast<void*>(&max_thread),
+                         reinterpret_cast<void*>(&wall_time),
+                         info_log,
+                         reinterpret_cast<void*>(log_size),
+                         error_log,
+                         reinterpret_cast<void*>(log_size),
+                         reinterpret_cast<void*>(1)};
+  hiprtcLinkState rtc_link_state;
+  void* binary;
+  size_t binarySize;
+  int pass_count = 0;
+  hipModule_t module;
+  hipFunction_t function;
+  for (int i = 0; i < 2; i++) {
+    switch (i) {
+      case 0 :
+        HIPRTC_CHECK(hiprtcLinkCreate(0, nullptr, nullptr, &rtc_link_state));
+        HIPRTC_CHECK(hiprtcLinkAddData(rtc_link_state,
+                                       HIPRTC_JIT_INPUT_LLVM_BITCODE,
+                                       codec.data(), codeSize, 0, 0, 0, 0));
+        HIPRTC_CHECK(hiprtcLinkComplete(rtc_link_state, &binary, &binarySize));
+        HIP_CHECK(hipModuleLoadData(&module, binary));
+        HIP_CHECK(hipModuleGetFunction(&function, module, kername));
+        HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0,
+                                        nullptr, kernel_parameter));
+        pass_count++;
+        break;
+      case 1 :
+        HIPRTC_CHECK(hiprtcLinkCreate(8, jit_options.data(),
+                                      reinterpret_cast<void**>(&lopts),
+                                      &rtc_link_state));
+        HIPRTC_CHECK(hiprtcLinkAddData(rtc_link_state,
+                                       HIPRTC_JIT_INPUT_LLVM_BITCODE,
+                                       codec.data(), codeSize, 0, 0, 0, 0));
+        HIPRTC_CHECK(hiprtcLinkComplete(rtc_link_state, &binary, &binarySize));
+        HIP_CHECK(hipModuleLoadData(&module, binary));
+        HIP_CHECK(hipModuleGetFunction(&function, module, kername));
+        HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0,
+                                        nullptr, kernel_parameter));
+        pass_count++;
+        break;
+      default:
+        WARN(" NOT VALID INPUT ");
+        break;
+    }
+  }
+  HIP_CHECK(hipMemcpy(result, C_d, Nbytes, hipMemcpyDeviceToHost));
+  for (int i = 0 ; i< 1; i++) {
+    if (result[i] != ((A_h[i] * B_h[i]))) {
+      WARN("Compiler Option : " << compiler_opt);
+      WARN("EXPECTED RESULT DOES NOT MATCH ");
+      WARN("INPUT A & B : " << A_h[i] <<" , "<< B_h[i]);
+      WARN("EXPECTED RES : " << (A_h[i] * B_h[i]));
+      WARN("OBTAINED RES : " << result[i]);
+      return 0;
+    }
+  }
+  if (pass_count == 2) {
+    return 1;
+  } else {
+    WARN(" pass_count IS NOT MATCHING ");
+    return 0;
+  }
+}
+
+bool check_denormals_enabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "denormals";
+  std::string retrieved_CO = get_string_parameters("compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  picojson::array Input_Vals = get_array_parameters("Input_Vals", block_name);
+  picojson::array Expected_Results = get_array_parameters("Expected_Results",
+                                                          block_name);
+  const char* kername = kernel_name.c_str();
+  const char* compiler_option = retrieved_CO.c_str();
+  std::vector<double> double_vec_input;
+  for (auto& indx : Input_Vals) {
+    double_vec_input.push_back(indx.get<double>());
+  }
+  std::vector<int> Input_Vals_int;
+  for (auto& indx : double_vec_input) {
+    Input_Vals_int.push_back(static_cast<int>(indx));
+  }
+  std::vector<double> double_vec_expected;
+  for (auto& indx : Expected_Results) {
+    double_vec_expected.push_back(indx.get<double>());
+  }
+  std::vector<int> Expected_Results_int;
+  for (auto& indx : double_vec_expected) {
+    Expected_Results_int.push_back(static_cast<int>(indx));
+  }
+  int test_case, res_inc;
+  for (test_case = 0, res_inc = 0; test_case < Input_Vals_int.size() &&
+       res_inc < Expected_Results_int.size(); test_case+=2, res_inc++) {
+    double *base_h, *power_h, *result_h;
+    double *base_d, *power_d, *result_d;
+    double Nbytes = sizeof(double);
+    base_h = new double[1];
+    power_h = new double[1];
+    result_h = new double[1];
+    *base_h = Input_Vals_int[test_case];
+    *power_h = Input_Vals_int[test_case+1];
+    *result_h = 1;
+    HIP_CHECK(hipMalloc(&base_d, Nbytes));
+    HIP_CHECK(hipMalloc(&power_d, Nbytes));
+    HIP_CHECK(hipMalloc(&result_d, Nbytes));
+    HIP_CHECK(hipMemcpy(base_d, base_h, Nbytes, hipMemcpyHostToDevice));
+    HIP_CHECK(hipMemcpy(power_d, power_h, Nbytes, hipMemcpyHostToDevice));
+    HIP_CHECK(hipMemcpy(result_d, result_h, Nbytes, hipMemcpyHostToDevice));
+    hiprtcProgram program;
+    HIPRTC_CHECK(hiprtcCreateProgram(&program, denormals_string,
+                                                  "denormals", 0, NULL, NULL));
+    if (Combination_CO_size != -1) {
+      hiprtcResult compileResult{hiprtcCompileProgram(program,
+                                                      Combination_CO_size,
+                                                      Combination_CO)};
+      if (!(compileResult == HIPRTC_SUCCESS)) {
+        WARN("Compiler Option : " << compiler_option);
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+        WARN("hiprtcCompileProgram() api failed!! with error code: ");
+        WARN(compileResult);
+        size_t logSize;
+        HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize));
+        if (logSize) {
+          std::string log(logSize, '\0');
+          HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0]));
+          WARN(log);
+        }
+        return 0;
+      }
+    } else {
+      hiprtcResult compileResult{hiprtcCompileProgram(program, 1,
+                                                    &compiler_option)};
+      if (!(compileResult == HIPRTC_SUCCESS)) {
+        WARN("Compiler Option : " << compiler_option);
+        WARN("hiprtcCompileProgram() api failed!! with error code: ");
+        WARN(compileResult);
+        size_t logSize;
+        HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize));
+        if (logSize) {
+          std::string log(logSize, '\0');
+          HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0]));
+          WARN(log);
+        }
+        return 0;
+      }
+    }
+    size_t codeSize;
+    HIPRTC_CHECK(hiprtcGetCodeSize(program, &codeSize));
+    std::vector<char> codec(codeSize);
+    HIPRTC_CHECK(hiprtcGetCode(program, codec.data()));
+    void* kernelParam[] = {base_d, power_d, result_d};
+    auto size = sizeof(kernelParam);
+    void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
+                        HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
+                        HIP_LAUNCH_PARAM_END};
+    hipModule_t module;
+    hipFunction_t function;
+    HIP_CHECK(hipModuleLoadData(&module, codec.data()));
+    HIP_CHECK(hipModuleGetFunction(&function, module, kername));
+    HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0,
+                                              nullptr, kernel_parameter));
+    HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(double),
+                        hipMemcpyDeviceToHost));
+    HIP_CHECK(hipDeviceSynchronize());
+    HIP_CHECK(hipModuleUnload(module));
+    HIPRTC_CHECK(hiprtcDestroyProgram(&program));
+    if (*result_h != Expected_Results_int[res_inc]) {
+      WARN("Compiler Option : " << compiler_option);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("EXPECTED RESULT DOES NOT MATCH FOR " << res_inc);
+      WARN("th ITERATION (start iteration is 0 ) ");
+      WARN("INPUT : pow(2, " << *power_h << ") ");
+      WARN("EXPECTED OP: " << Expected_Results_int[res_inc]);
+      WARN("OBTAINED OP: " << *result_h);
+      return 0;
+    }
+  }
+  return 1;
+}
+
+bool check_denormals_disabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "denormals";
+  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  picojson::array Input_Vals = get_array_parameters("Input_Vals", block_name);
+  picojson::array Expected_Results_for_no = get_array_parameters(
+                                        "Expected_Results_for_no", block_name);
+  const char* kername = kernel_name.c_str();
+  const char* compiler_option = retrieved_CO.c_str();
+  std::vector<double> double_vec_input;
+  for (auto& indx : Input_Vals) {
+    double_vec_input.push_back(indx.get<double>());
+  }
+  std::vector<int> Input_Vals_int;
+  for (auto& indx : double_vec_input) {
+    Input_Vals_int.push_back(static_cast<int>(indx));
+  }
+  std::vector<double> double_vec_expected_for_no;
+  for (auto& indx : Expected_Results_for_no) {
+    double_vec_expected_for_no.push_back(indx.get<double>());
+  }
+  std::vector<int> Expected_Results_for_no_int;
+  for (auto& indx : double_vec_expected_for_no) {
+    Expected_Results_for_no_int.push_back(static_cast<int>(indx));
+  }
+  int test_case, res_inc;
+  for (test_case = 0, res_inc = 0; test_case < Input_Vals_int.size() &&
+       res_inc < Expected_Results_for_no_int.size(); test_case+=2, res_inc++) {
+    double *base_h, *power_h, *result_h;
+    double *base_d, *power_d, *result_d;
+    double Nbytes = sizeof(double);
+    base_h = new double[1];
+    power_h = new double[1];
+    result_h = new double[1];
+    *base_h = Input_Vals_int[test_case];
+    *power_h = Input_Vals_int[test_case+1];
+    *result_h = 0;
+    HIP_CHECK(hipMalloc(&base_d, Nbytes));
+    HIP_CHECK(hipMalloc(&power_d, Nbytes));
+    HIP_CHECK(hipMalloc(&result_d, Nbytes));
+    HIP_CHECK(hipMemcpy(base_d, base_h, Nbytes, hipMemcpyHostToDevice));
+    HIP_CHECK(hipMemcpy(power_d, power_h, Nbytes, hipMemcpyHostToDevice));
+    HIP_CHECK(hipMemcpy(result_d, result_h, Nbytes, hipMemcpyHostToDevice));
+    hiprtcProgram program;
+    HIPRTC_CHECK(hiprtcCreateProgram(&program, denormals_string,
+                                                  "denormals", 0, NULL, NULL));
+    if (Combination_CO_size != -1) {
+      hiprtcResult compileResult{hiprtcCompileProgram(program,
+                                                      Combination_CO_size,
+                                                      Combination_CO)};
+      if (!(compileResult == HIPRTC_SUCCESS)) {
+        WARN("Compiler Option : " << compiler_option);
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+        WARN("hiprtcCompileProgram() api failed!! with error code: ");
+        WARN(compileResult);
+        size_t logSize;
+        HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize));
+        if (logSize) {
+          std::string log(logSize, '\0');
+          HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0]));
+          WARN(log);
+        }
+        return 0;
+      }
+    } else {
+      hiprtcResult compileResult{hiprtcCompileProgram(program, 1,
+                                                    &compiler_option)};
+      if (!(compileResult == HIPRTC_SUCCESS)) {
+        WARN("Compiler Option : " << compiler_option);
+        WARN("hiprtcCompileProgram() api failed!! with error code: ");
+        WARN(compileResult);
+        size_t logSize;
+        HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &logSize));
+        if (logSize) {
+          std::string log(logSize, '\0');
+          HIPRTC_CHECK(hiprtcGetProgramLog(program, &log[0]));
+          WARN(log);
+        }
+        return 0;
+      }
+    }
+    size_t codeSize;
+    HIPRTC_CHECK(hiprtcGetCodeSize(program, &codeSize));
+    std::vector<char> codec(codeSize);
+    HIPRTC_CHECK(hiprtcGetCode(program, codec.data()));
+    void* kernelParam[] = {base_d, power_d, result_d};
+    auto size = sizeof(kernelParam);
+    void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
+                        HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
+                        HIP_LAUNCH_PARAM_END};
+    hipModule_t module;
+    hipFunction_t function;
+    HIP_CHECK(hipModuleLoadData(&module, codec.data()));
+    HIP_CHECK(hipModuleGetFunction(&function, module, kername));
+    HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0,
+                                              nullptr, kernel_parameter));
+    HIP_CHECK(hipMemcpy(result_h, result_d, sizeof(double),
+                        hipMemcpyDeviceToHost));
+    HIP_CHECK(hipDeviceSynchronize());
+    HIP_CHECK(hipModuleUnload(module));
+    HIPRTC_CHECK(hiprtcDestroyProgram(&program));
+    if (*result_h != Expected_Results_for_no_int[res_inc]) {
+      WARN("Compiler Option : " << compiler_option);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("EXPECTED RESULT DOES NOT MATCH FOR " << res_inc);
+      WARN("th ITERATION (start iteration is 0 ) ");
+      WARN("INPUT : pow(2, " << *power_h << ") ");
+      WARN("EXPECTED OP: "<< Expected_Results_for_no_int[res_inc]);
+      WARN("OBTAINED OP: "<< *result_h);
+      return 0;
+    }
+  }
+  return 1;
+}
+
+bool check_ffp_contract_off(const char** Combination_CO,
+                           int Combination_CO_size, int max_thread_pos,
+                           int fast_math_present) {
+  std::string block_name = "ffp_contract";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  picojson::array retrieved_CO = get_array_parameters("compiler_option",
+                                                          block_name);
+  if (retrieved_CO.size() < 3) {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::vector<std::string> CO_vec;
+  for (auto& indx : retrieved_CO) {
+    CO_vec.push_back(indx.get<std::string>());
+  }
+  int CO_IRadded_size = 3;
+  const char** CO_IRadded = new const char*[3];
+  std::string hold = CO_vec[0];
+  CO_IRadded[0] = hold.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                 Combination_CO, Combination_CO_size);
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO[0]);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (data.find("fmul contract") != -1 &&
+      data.find("@llvm.fmuladd.f32") != -1) {
+    WARN("Compiler option : " << retrieved_CO[0]);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR CONTAIN EITHER");
+    WARN("'fmul contract' or '@llvm.fmuladd.f32' or both ");
+    WARN("WHICH IS NOT EXPECTED");
+    return 0;
+  } else {
+    return 1;
+  }
+}
+
+bool check_ffp_contract_on(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "ffp_contract";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  picojson::array retrieved_CO = get_array_parameters("compiler_option",
+                                                          block_name);
+  if (retrieved_CO.size() < 3) {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::vector<std::string> CO_vec;
+  for (auto& indx : retrieved_CO) {
+    CO_vec.push_back(indx.get<std::string>());
+  }
+  int CO_IRadded_size = 3;
+  const char** CO_IRadded = new const char*[3];
+  std::string hold = CO_vec[1];
+  CO_IRadded[0] = hold.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  std::string data = checking_IR(kername, CO_IRadded,
+                                 CO_IRadded_size, Combination_CO,
+                                 Combination_CO_size);
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO[1]);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (fast_math_present!= -1) {
+    if (fast_math_present == 0 && data.find("@llvm.fmuladd.f32")!= -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO[1]);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN '@llvm.fmuladd.f32' ");
+      return 0;
+    }
+  } else {
+    if (data.find("@llvm.fmuladd.f32") != -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO[1]);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN '@llvm.fmuladd.f32' ");
+      return 0;
+    }
+  }
+}
+
+bool check_ffp_contract_fast(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "ffp_contract";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  picojson::array retrieved_CO = get_array_parameters("compiler_option",
+                                                          block_name);
+  if (retrieved_CO.size() < 3) {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::vector<std::string> CO_vec;
+  for (auto& indx : retrieved_CO) {
+    CO_vec.push_back(indx.get<std::string>());
+  }
+  int CO_IRadded_size = 3;
+  const char** CO_IRadded = new const char*[3];
+  std::string hold = CO_vec[2];
+  CO_IRadded[0] = hold.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                 Combination_CO, Combination_CO_size);
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO[2]);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (fast_math_present!= -1) {
+    if (fast_math_present == 1 && data.find("contract")!= -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO[2]);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN 'fmul contract' ");
+      return 0;
+    }
+  } else {
+    if (data.find("fmul contract") != -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO[2]);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN 'fmul contract' ");
+      return 0;
+    }
+  }
+}
+
+bool check_fast_math_enabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "fast_math";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string retrieved_CO = get_string_parameters("compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  int CO_IRadded_size = 3;
+  const char** CO_IRadded = new const char*[3];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                 Combination_CO, Combination_CO_size);
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (data.find("fmul fast")!= -1) {
+    return 1;
+  } else {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR DOESN'T CONTAIN 'fmul fast' ");
+    return 0;
+  }
+}
+
+bool check_fast_math_disabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "fast_math";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  int CO_IRadded_size = 3;
+  const char** CO_IRadded = new const char*[3];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                 Combination_CO, Combination_CO_size);
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (data.find("fmul fast")!= -1) {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR DOESN'T CONTAIN 'fmul fast' ");
+    return 0;
+  } else {
+    return 1;
+  }
+}
+
+bool check_slp_vectorize_enabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "slp_vectorize";
+  std::string retrieved_CO = get_string_parameters("compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  int CO_IRadded_size = 3;
+  const char** CO_IRadded = new const char*[3];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  __half2 *a_d, *x_d, *y_d;
+  __half2 a_h, x_h;
+  a_h.data.x = 1.5;
+  x_h.data.y = 3.0;
+  CaptureStream capture(stderr);
+  HIP_CHECK(hipMalloc(&a_d, sizeof(__half2)));
+  HIP_CHECK(hipMalloc(&x_d, sizeof(__half2)));
+  HIP_CHECK(hipMalloc(&y_d, sizeof(__half2)));
+  HIP_CHECK(hipMemcpy(a_d, &a_h, sizeof(__half2), hipMemcpyHostToDevice));
+  HIP_CHECK(hipMemcpy(x_d, &x_h, sizeof(__half2), hipMemcpyHostToDevice));
+  hiprtcProgram prog;
+  HIPRTC_CHECK(hiprtcCreateProgram(&prog, slp_vectorize_string,
+                                                kername, 0, NULL, NULL));
+  if (Combination_CO_size != -1) {
+    int Combination_CO_IRadded_size = Combination_CO_size+3;
+    int b = 0;
+    std::vector<std::string> add_ir_forcombi(Combination_CO_size + 3, "");
+    const char** Combination_CO_IRadded =
+                                       new const char*[Combination_CO_size+3];
+    for (int i = 0; i < Combination_CO_size+3; ++i) {
+      if (i == Combination_CO_size) {
+        Combination_CO_IRadded[i] = "-fno-signed-zeros";
+        Combination_CO_IRadded[i+1] = "-mllvm";
+        Combination_CO_IRadded[i+2] = "-print-after=constmerge";
+        break;
+      }
+      add_ir_forcombi[i] = Combination_CO[b];
+      Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str();
+      b++;
+    }
+    capture.Begin();
+    hiprtcResult compileResult{hiprtcCompileProgram(prog,
+                                               Combination_CO_IRadded_size,
+                                               Combination_CO_IRadded)};
+    capture.End();
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler option : " << retrieved_CO);
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size+3; i++) {
+        WARN(Combination_CO_IRadded[i]);
+      }
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  } else {
+    capture.Begin();
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, CO_IRadded_size,
+                                               CO_IRadded)};
+    capture.End();
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler option : " << retrieved_CO);
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  }
+  std::string data = capture.getData();
+  std::stringstream dataStream;
+  size_t codeSize;
+  HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
+  std::vector<char> codec(codeSize);
+  HIPRTC_CHECK(hiprtcGetCode(prog, codec.data()));
+  void* kernelParam[] = {reinterpret_cast<void*>(a_d),
+                         reinterpret_cast<void*>(x_d),
+                         reinterpret_cast<void*>(y_d)};
+  auto size = sizeof(kernelParam);
+  void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
+                              HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
+                              HIP_LAUNCH_PARAM_END};
+  hipModule_t module;
+  hipFunction_t function;
+  HIP_CHECK(hipModuleLoadData(&module, codec.data()));
+  HIP_CHECK(hipModuleGetFunction(&function, module, kername));
+  HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr,
+                                  kernel_parameter));
+  HIP_CHECK(hipDeviceSynchronize());
+  HIP_CHECK(hipModuleUnload(module));
+  HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  int times = 0;
+  if (data.find("contract <2 x half>", 0) != -1) {
+    times++;
+  }
+  int start = data.find("contract <2 x half>", 0) + 1;
+  while (data.find("contract <2 x half>", start) != -1) {
+    times++;
+    start = data.find("contract <2 x half>", start)+1;
+  }
+  if (times == 1) {
+    return 1;
+  } else if (times == 0) {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR DOESN'T CONTAIN 'fadd contract <2 x half>' ");
+    return 0;
+  } else {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR CONTAIN 'fadd contract <2 x half>' " << times << "times");
+    WARN(" WHICH IS NOT EXPECTED (IT SHOULD BE PRESENT ONCE)");
+    return 0;
+  }
+}
+
+bool check_slp_vectorize_disabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "slp_vectorize";
+  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  int CO_IRadded_size = 3;
+  const char** CO_IRadded = new const char*[3];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  __half2 *a_d, *x_d, *y_d;
+  __half2 a_h, x_h;
+  a_h.data.x = 1.5;
+  x_h.data.y = 3.0;
+  CaptureStream capture(stderr);
+  HIP_CHECK(hipMalloc(&a_d, sizeof(__half2)));
+  HIP_CHECK(hipMalloc(&x_d, sizeof(__half2)));
+  HIP_CHECK(hipMalloc(&y_d, sizeof(__half2)));
+  HIP_CHECK(hipMemcpy(a_d, &a_h, sizeof(__half2), hipMemcpyHostToDevice));
+  HIP_CHECK(hipMemcpy(x_d, &x_h, sizeof(__half2), hipMemcpyHostToDevice));
+  hiprtcProgram prog;
+  HIPRTC_CHECK(hiprtcCreateProgram(&prog, slp_vectorize_string,
+                                                kername, 0, NULL, NULL));
+  if (Combination_CO_size != -1) {
+    int Combination_CO_IRadded_size = Combination_CO_size+3;
+    int b = 0;
+    std::vector<std::string> add_ir_forcombi(Combination_CO_size + 3, "");
+    const char** Combination_CO_IRadded =
+                                       new const char*[Combination_CO_size+3];
+    for (int i = 0; i < Combination_CO_size+3; ++i) {
+      if (i == Combination_CO_size) {
+        Combination_CO_IRadded[i] = "-fno-signed-zeros";
+        Combination_CO_IRadded[i+1] = "-mllvm";
+        Combination_CO_IRadded[i+2] = "-print-after=constmerge";
+        break;
+      }
+      add_ir_forcombi[i] = Combination_CO[b];
+      Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str();
+      b++;
+    }
+    capture.Begin();
+    hiprtcResult compileResult{hiprtcCompileProgram(prog,
+                                               Combination_CO_IRadded_size,
+                                               Combination_CO_IRadded)};
+    capture.End();
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler option : " << retrieved_CO);
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size+3; i++) {
+        WARN(Combination_CO_IRadded[i]);
+      }
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  } else {
+    capture.Begin();
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, CO_IRadded_size,
+                                               CO_IRadded)};
+    capture.End();
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler option : " << retrieved_CO);
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  }
+  std::string data = capture.getData();
+  std::stringstream dataStream;
+  size_t codeSize;
+  HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
+  std::vector<char> codec(codeSize);
+  HIPRTC_CHECK(hiprtcGetCode(prog, codec.data()));
+  void* kernelParam[] = {reinterpret_cast<void*>(a_d),
+                         reinterpret_cast<void*>(x_d),
+                         reinterpret_cast<void*>(y_d)};
+  auto size = sizeof(kernelParam);
+  void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
+                              HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
+                              HIP_LAUNCH_PARAM_END};
+  hipModule_t module;
+  hipFunction_t function;
+  HIP_CHECK(hipModuleLoadData(&module, codec.data()));
+  HIP_CHECK(hipModuleGetFunction(&function, module, kername));
+  HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr,
+                                  kernel_parameter));
+  HIP_CHECK(hipDeviceSynchronize());
+  HIP_CHECK(hipModuleUnload(module));
+  HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
+  int times = 0;
+  if (data.find("contract <2 x half>", 0) != -1) {
+    times++;
+  }
+  int start = data.find("contract <2 x half>", 0) + 1;
+  while (data.find("contract <2 x half>", start) != -1) {
+    times++;
+    start = data.find("contract <2 x half>", start)+1;
+  }
+  if (times == 2) {
+    return 1;
+  } else if (times < 2) {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR CONTAIN 'fadd contract <2 x half>' " << times << "times");
+    WARN(" WHICH IS NOT EXPECTED(IT SHOULD BE PRESENT TWICE)");
+    return 0;
+  } else {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR CONTAIN 'fadd contract <2 x half>' " << times << "times");
+    WARN(" WHICH IS NOT EXPECTED(IT SHOULD BE PRESENT TWICE)");
+    return 0;
+  }
+}
+
+bool check_macro(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "macro";
+  std::string retrieved_CO = get_string_parameters("compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  picojson::array Expected_Results = get_array_parameters("Expected_Results",
+                                                           block_name);
+  const char* kername = kernel_name.c_str();
+  std::vector<double> double_vec_expected;
+  for (auto& indx : Expected_Results) {
+    double_vec_expected.push_back(indx.get<double>());
+  }
+  std::vector<int> Expected_Results_int;
+  for (auto& indx : double_vec_expected) {
+    Expected_Results_int.push_back(static_cast<int>(indx));
+  }
+  const char* compiler_option = retrieved_CO.c_str();
+  hiprtcProgram prog;
+  HIPRTC_CHECK(hiprtcCreateProgram(&prog, macro_string,
+                                                kername, 0, NULL, NULL));
+  if (Combination_CO_size != -1) {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
+                                                    Combination_CO)};
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler Option : " << compiler_option);
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  } else {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
+                                                   &compiler_option)};
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler Option : " << compiler_option);
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  }
+  int *macro_value_h;
+  int *macro_value_d;
+  macro_value_h = new int[1];
+  HIP_CHECK(hipMalloc(&macro_value_d, sizeof(int)));
+  *macro_value_h = 0;
+  HIP_CHECK(hipMemcpy(macro_value_d, macro_value_h, sizeof(int),
+            hipMemcpyHostToDevice));
+  size_t codeSize;
+  HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
+  std::vector<char> codec(codeSize);
+  hiprtcGetCode(prog, codec.data());
+  void* kernelParam[] = {macro_value_d};
+  auto size = sizeof(kernelParam);
+  void* kernel_parameter[]={HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
+                            HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
+                            HIP_LAUNCH_PARAM_END};
+  hipModule_t module;
+  hipFunction_t function;
+  HIP_CHECK(hipModuleLoadData(&module, codec.data()));
+  HIP_CHECK(hipModuleGetFunction(&function, module, kername));
+  HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr,
+                        kernel_parameter));
+  HIP_CHECK(hipMemcpy(macro_value_h, macro_value_d, sizeof(int),
+                      hipMemcpyDeviceToHost));
+  HIP_CHECK(hipDeviceSynchronize());
+  HIP_CHECK(hipModuleUnload(module));
+  HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
+  if (*macro_value_h != Expected_Results_int[0]) {
+    WARN("Compiler Option : " << compiler_option);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("EXPECTED RESULT DOES NOT MATCH");
+    WARN("INPUT: " << compiler_option);
+    WARN("EXPECTED OP : "<< Expected_Results_int[0]);
+    WARN("OBTAINED OP: "<< *macro_value_h);
+    return 0;
+  } else {
+    return 1;
+  }
+}
+
+bool check_undef_macro(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "undef_macro";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  picojson::array comp_opt = get_array_parameters("compiler_option",
+                                                  block_name);
+  if (comp_opt.size() < 2) {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::vector<std::string> compiler_option;
+  for (auto& indx : comp_opt) {
+    compiler_option.push_back(indx.get<std::string>());
+  }
+  std::vector<std::string> variable(compiler_option.size(), "");
+  const char** appended_compiler_options =
+                                     new const char*[compiler_option.size()];
+  for (int i = 0; i < compiler_option.size(); ++i) {
+    variable[i] = compiler_option[i];
+    appended_compiler_options[i] = variable[i].c_str();
+  }
+  hiprtcProgram prog;
+  HIPRTC_CHECK(hiprtcCreateProgram(&prog, undef_macro_string,
+                                                 kername, 0, NULL, NULL));
+  if (Combination_CO_size != -1) {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
+                                                    Combination_CO)};
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        if (log.find("undeclared identifier")) {
+          return 1;
+        }
+      } else {
+        WARN("Compiler Option : " << appended_compiler_options[1]);
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+        WARN("Expected error : 'undeclared identifier' NOT GENERATED");
+        return 0;
+      }
+    }
+  } else {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog,
+                                                    compiler_option.size(),
+                                                 appended_compiler_options)};
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        if (log.find("undeclared identifier")) {
+          return 1;
+        }
+      } else {
+        WARN("Compiler Option : " << appended_compiler_options[0]);
+        if (Combination_CO_size != -1) {
+          WARN("FAILED IN COMBINATION :");
+          for (int i = 0; i < Combination_CO_size; i++) {
+            WARN(Combination_CO[i]);
+          }
+        }
+        WARN("Expected error : 'undeclared identifier' NOT GENERATED");
+        return 0;
+      }
+    }
+  }
+  WARN("Compiler Option : " << appended_compiler_options[0]);
+  if (Combination_CO_size != -1) {
+    WARN("FAILED IN COMBINATION :");
+    for (int i = 0; i < Combination_CO_size; i++) {
+      WARN(Combination_CO[i]);
+    }
+  }
+  WARN("EXPECTED ERROR WAS NOT GENERATED");
+  return 0;
+}
+
+bool check_header_dir(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "header_dir";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string compiler_option = get_string_parameters("compiler_option",
+                                                 block_name);
+  if (compiler_option == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  picojson::array Headers = get_array_parameters("Headers", block_name);
+  picojson::array depending_comp_optn =
+                     get_array_parameters("depending_comp_optn", block_name);
+  picojson::array Src_headers =
+                             get_array_parameters("Src_headers", block_name);
+  picojson::array Input_Thrd_Vals =
+                              get_array_parameters("Input_Vals", block_name);
+  picojson::array Expected_Results =
+                        get_array_parameters("Expected_Results", block_name);
+  std::string str = "pwd";
+  const char *cmd = str.c_str();
+  CaptureStream capture(stdout);
+  capture.Begin();
+  system(cmd);
+  capture.End();
+  std::string wor_dir = capture.getData();
+  std::string break_dir = wor_dir.substr(0, wor_dir.find("build"));
+  std::string append_str = "catch/unit/rtc/headers";
+  std::string CO = compiler_option + " " + break_dir + append_str;
+  const char* appended_CO = CO.c_str();
+  std::vector<std::string> Headers_list;
+  for (auto& indx : Headers) {
+    Headers_list.push_back(indx.get<std::string>());
+  }
+  std::vector<std::string> Src_headers_list;
+  for (auto& indx : Src_headers) {
+    Src_headers_list.push_back(indx.get<std::string>());
+  }
+  std::vector<std::string> depending_co_list;
+  for (auto& indx : depending_comp_optn) {
+    depending_co_list.push_back(indx.get<std::string>());
+  }
+  std::vector<double> double_vec_target;
+  for (auto& indx : Input_Thrd_Vals) {
+    double_vec_target.push_back(indx.get<double>());
+  }
+  std::vector<int> Input_Thrd_Vals_int;
+  for (auto& indx : double_vec_target) {
+    Input_Thrd_Vals_int.push_back(static_cast<int>(indx));
+  }
+  std::vector<double> double_vec_expected;
+  for (auto& indx : Expected_Results) {
+    double_vec_expected.push_back(indx.get<double>());
+  }
+  std::vector<int> Expected_Results_int;
+  for (auto& indx : double_vec_expected) {
+    Expected_Results_int.push_back(static_cast<int>(indx));
+  }
+  std::vector<std::string> src_var_hdr_lst(Src_headers_list.size(), "");
+  const char** src_hder_lst = new const char*[Src_headers_list.size()];
+  for (int i = 0; i < Src_headers_list.size(); ++i) {
+    src_var_hdr_lst[i] = Src_headers_list[i];
+    src_hder_lst[i] = src_var_hdr_lst[i].c_str();
+  }
+  std::vector<std::string> var_hdr_lst(Headers_list.size(), "");
+  const char** hder_lst = new const char*[Headers_list.size()];
+  for (int i = 0; i < Headers_list.size(); ++i) {
+    var_hdr_lst[i] = Headers_list[i];
+    hder_lst[i] = var_hdr_lst[i].c_str();
+  }
+  for (int senario = 0; senario< Input_Thrd_Vals_int.size(); senario++) {
+    hiprtcProgram prog;
+    HIPRTC_CHECK(hiprtcCreateProgram(&prog, header_dir_string,
+                                                  kername, Headers_list.size(),
+                                                  src_hder_lst, hder_lst));
+    if (Combination_CO_size != -1) {
+      hiprtcResult compileResult{hiprtcCompileProgram(prog,
+                                                      Combination_CO_size,
+                                                      Combination_CO)};
+      if (!(compileResult == HIPRTC_SUCCESS)) {
+        WARN("Compiler Option : " << appended_CO);
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+        WARN("hiprtcCompileProgram() api failed!! with error code: ");
+        WARN(compileResult);
+        size_t logSize;
+        HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+        if (logSize) {
+          std::string log(logSize, '\0');
+          HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+          WARN(log);
+        }
+        return 0;
+      }
+    } else {
+      hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
+                                                      &appended_CO)};
+      if (!(compileResult == HIPRTC_SUCCESS)) {
+        WARN("Compiler Option : " << appended_CO);
+        WARN("hiprtcCompileProgram() api failed!! with error code: ");
+        WARN(compileResult);
+        size_t logSize;
+        HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+        if (logSize) {
+          std::string log(logSize, '\0');
+          HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+          WARN(log);
+        }
+        return 0;
+      }
+    }
+    size_t codeSize;
+    HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
+    std::vector<char> codec(codeSize);
+    HIPRTC_CHECK(hiprtcGetCode(prog, codec.data()));
+    int value_h = 0;
+    int* ptr_value_h = &value_h;
+    int input_h = Input_Thrd_Vals_int[senario];
+    int* ptr_input_h = &input_h;
+    int* value_d;
+    int* input_d;
+    HIP_CHECK(hipMalloc(&value_d, sizeof(int)));
+    HIP_CHECK(hipMalloc(&input_d, sizeof(int)));
+    HIP_CHECK(hipMemcpy(value_d, ptr_value_h, sizeof(int),
+                        hipMemcpyHostToDevice));
+    HIP_CHECK(hipMemcpy(input_d, ptr_input_h, sizeof(int),
+                        hipMemcpyHostToDevice));
+    void* kernelParam[] = {value_d, input_d};
+    auto size = sizeof(kernelParam);
+    void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
+                                HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
+                                HIP_LAUNCH_PARAM_END};
+    hipModule_t module;
+    hipFunction_t function;
+    HIP_CHECK(hipModuleLoadData(&module, codec.data()));
+    HIP_CHECK(hipModuleGetFunction(&function, module, kername));
+    HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr,
+                          kernel_parameter));
+    HIP_CHECK(hipMemcpy(ptr_value_h, value_d, sizeof(int),
+                        hipMemcpyDeviceToHost));
+    if (*ptr_value_h != Expected_Results_int[senario]) {
+      WARN("Compiler Option : " << appended_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN(" EXPECTED RESULT DOES NOT MATCH FOR " << senario);
+      WARN("th ITERATION (start iteration is 0 ) ");
+      WARN(" INPUT: " << Input_Thrd_Vals_int[senario]);
+      WARN(" EXPECTED OP: "<< Expected_Results_int[senario]);
+      WARN(" OBTAINED OP: "<< *ptr_value_h);
+      return 0;
+    }
+    HIP_CHECK(hipDeviceSynchronize());
+    HIP_CHECK(hipModuleUnload(module));
+    HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
+  }
+  return 1;
+}
+
+bool check_warning(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "warning";
+  std::string retrieved_CO =
+                         get_string_parameters("compiler_option", block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  const char* compiler_option = retrieved_CO.c_str();
+  hiprtcProgram prog;
+  HIPRTC_CHECK(hiprtcCreateProgram(&prog, warning_string, kername,
+                                                0, NULL, NULL));
+  if (Combination_CO_size != -1) {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
+                                                    Combination_CO)};
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler Option : " << compiler_option);
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  } else {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
+                                                    &compiler_option)};
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler Option : " << compiler_option);
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  }
+  size_t logSize;
+  HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+  if (logSize) {
+    std::string log(logSize, '\0');
+    HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+    if (-1 != log.find("#warning")) {
+      WARN("Compiler Option : " << compiler_option);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN(" WARNING MESSAGE IS PRINTING WHICH IS NOT SUPRESSED ");
+      return 0;
+    } else {
+      return 1;
+    }
+  } else {
+    return 1;
+  }
+}
+
+bool check_Rpass_inline(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "Rpass_inline";
+  std::string retrieved_CO =
+                         get_string_parameters("compiler_option", block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  const char* compiler_option = retrieved_CO.c_str();
+  hiprtcProgram prog;
+  HIPRTC_CHECK(hiprtcCreateProgram(&prog, max_thread_string,
+                                                kername, 0, NULL, NULL));
+  if (Combination_CO_size != -1) {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
+                                                    Combination_CO)};
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler Option : " << compiler_option);
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  } else {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
+                                                  &compiler_option)};
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler Option : " << compiler_option);
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  }
+  size_t logSize;
+  HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+  if (logSize) {
+    std::string log(logSize, '\0');
+    HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+    if (log.find("inlined into")) {
+      return 1;
+    } else {
+      WARN("Compiler Option : " << compiler_option);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("EXPECTED STRING 'inlined into' IS NOT PRESENT IN LOG ");
+      return 0;
+    }
+  } else {
+    WARN("Compiler Option : " << compiler_option);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN(" LOG WITH EXPECTED STRING 'inlined into' IS NOT PRESENT ");
+    return 0;
+  }
+}
+
+bool check_conversionerror_enabled(const char** Combination_CO,
+                                int Combination_CO_size, int max_thread_pos,
+                                int fast_math_present) {
+  std::string block_name = "error";
+  picojson::array retrieved_CO = get_array_parameters("compiler_option",
+                                                          block_name);
+  if (retrieved_CO.size() < 4) {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::vector<std::string> CO_vec;
+  for (auto& indx : retrieved_CO) {
+    CO_vec.push_back(indx.get<std::string>());
+  }
+  std::string variable = CO_vec[0];
+  const char* compiler_option = variable.c_str();
+  hiprtcProgram prog;
+  HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string,
+                                                kername, 0, NULL, NULL));
+  if (Combination_CO_size != -1) {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
+                                                    Combination_CO)};
+  } else {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
+                                                     &compiler_option)};
+  }
+  size_t logSize;
+  HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+  if (logSize) {
+    std::string log(logSize, '\0');
+    HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+    std::string variable = "error";
+    if (-1 != log.find(variable)) {
+      return 1;
+    } else {
+      WARN("Compiler Option : " << compiler_option);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("ERROR MSG : '" << variable <<"' NOT FOUND");
+      return 0;
+    }
+  } else {
+    WARN("Compiler Option : " << compiler_option);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("LOG IS NOT GENERATED");
+    WARN("maybe due to presence of '-w' compiler option");
+    return 0;
+  }
+}
+
+bool check_conversionerror_disabled(const char** Combination_CO,
+                                 int Combination_CO_size, int max_thread_pos,
+                                 int fast_math_present) {
+  std::string block_name = "error";
+  picojson::array retrieved_CO = get_array_parameters("compiler_option",
+                                                          block_name);
+  if (retrieved_CO.size() < 4) {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::vector<std::string> CO_vec;
+  for (auto& indx : retrieved_CO) {
+    CO_vec.push_back(indx.get<std::string>());
+  }
+  std::string variable = CO_vec[1];
+  const char* compiler_option = variable.c_str();
+  hiprtcProgram prog;
+  HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string,
+                                                kername, 0, NULL, NULL));
+  if (Combination_CO_size != -1) {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
+                                                    Combination_CO)};
+  } else {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
+                                                     &compiler_option)};
+  }size_t logSize;
+  HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+  if (logSize) {
+    std::string log(logSize, '\0');
+    HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+    if (-1 != log.find("error")) {
+      WARN("Compiler Option : " << compiler_option);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("LOG IS PRESENT WITH ERROR WHICH IS NOT EXPECTED : ");
+      WARN("maybe due to presence of '-w' compiler option");
+      return 0;
+    } else {
+      return 1;
+    }
+  } else {
+    return 1;
+  }
+}
+
+bool check_conversionwarning_enabled(const char** Combination_CO,
+                                   int Combination_CO_size, int max_thread_pos,
+                                   int fast_math_present) {
+  std::string block_name = "error";
+  picojson::array retrieved_CO = get_array_parameters("compiler_option",
+                                                          block_name);
+  if (retrieved_CO.size() < 4) {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::vector<std::string> CO_vec;
+  for (auto& indx : retrieved_CO) {
+    CO_vec.push_back(indx.get<std::string>());
+  }
+  std::string variable = CO_vec[2];
+  const char* compiler_option = variable.c_str();
+  hiprtcProgram prog;
+  HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string,
+                                                kername, 0, NULL, NULL));
+  if (Combination_CO_size != -1) {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
+                                                    Combination_CO)};
+  } else {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
+                                                     &compiler_option)};
+  }size_t logSize;
+  HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+  if (logSize) {
+    std::string log(logSize, '\0');
+    HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+    std::string variable = "warning";
+    if (-1 != log.find(variable)) {
+      return 1;
+    } else {
+      WARN("Compiler Option : " << compiler_option);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("LOG DOESN'T CONTAIN WARNING AS EXP : " << compiler_option);
+      return 0;
+    }
+  } else {
+    WARN("Compiler Option : " << compiler_option);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("LOG IS NOT GENERATED");
+    return 0;
+  }
+}
+
+bool check_conversionwarning_disabled(const char** Combination_CO,
+                                      int Combination_CO_size,
+                                      int max_thread_pos,
+                                      int fast_math_present) {
+  std::string block_name = "error";
+  picojson::array retrieved_CO = get_array_parameters("compiler_option",
+                                                          block_name);
+  if (retrieved_CO.size() < 4) {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::vector<std::string> CO_vec;
+  for (auto& indx : retrieved_CO) {
+    CO_vec.push_back(indx.get<std::string>());
+  }
+  std::string variable = CO_vec[3];
+  const char* compiler_option = variable.c_str();
+  hiprtcProgram prog;
+  HIPRTC_CHECK(hiprtcCreateProgram(&prog, error_string,
+                                                kername, 0, NULL, NULL));
+  if (Combination_CO_size != -1) {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, Combination_CO_size,
+                                                    Combination_CO)};
+  } else {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
+                                                     &compiler_option)};
+  }size_t logSize;
+  HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+  if (logSize) {
+    std::string log(logSize, '\0');
+    HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+    if (-1 != log.find("warning")) {
+      WARN("Compiler Option : " << compiler_option);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("WARNING IS GENERATED WHICH IS NOT EXPECTED");
+      WARN(compiler_option);
+      return 0;
+    } else {
+      return 1;
+    }
+  } else {
+    return 1;
+  }
+}
+
+bool check_max_thread(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "max_thread";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  std::string default_CO = get_string_parameters("kernel_name", block_name);
+  picojson::array Target_Thrd_Vals = get_array_parameters("Target_Vals",
+                                                          block_name);
+  picojson::array Input_Thrd_Vals = get_array_parameters("Input_Vals",
+                                                          block_name);
+  picojson::array Expected_Results = get_array_parameters("Expected_Results",
+                                                          block_name);
+  const char* kername = kernel_name.c_str();
+  std::string compiler_option = get_string_parameters("compiler_option",
+                                                 block_name);
+  if (compiler_option == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::vector<double> double_vec_target;
+  for (auto& indx : Target_Thrd_Vals) {
+    double_vec_target.push_back(indx.get<double>());
+  }
+  std::vector<int> Target_Thrd_Vals_int;
+  for (auto& indx : double_vec_target) {
+    Target_Thrd_Vals_int.push_back(static_cast<int>(indx));
+  }
+  int a = 0;
+  std::vector<std::string> variable(Target_Thrd_Vals_int.size(), "");
+  const char** appended_compiler_options =
+                                 new const char*[Target_Thrd_Vals_int.size()];
+  for (int i = 0; i < Target_Thrd_Vals_int.size() ; i++) {
+    variable[i] = compiler_option + std::to_string(Target_Thrd_Vals_int[i]);
+    appended_compiler_options[i] = variable[i].c_str();
+  }
+  std::vector<double> double_vec_input;
+  for (auto& indx : Input_Thrd_Vals) {
+    double_vec_input.push_back(indx.get<double>());
+  }
+  std::vector<int> Input_Thrd_Vals_int;
+  for (auto& indx : double_vec_input) {
+    Input_Thrd_Vals_int.push_back(static_cast<int>(indx));
+  }
+  std::vector<double> double_vec_expected;
+  for (auto& indx : Expected_Results) {
+    double_vec_expected.push_back(indx.get<double>());
+  }
+  std::vector<int> Expected_Results_int;
+  for (auto& indx : double_vec_expected) {
+    Expected_Results_int.push_back(static_cast<int>(indx));
+  }
+  int pass_count = 0;
+  int inc = (Input_Thrd_Vals_int.size()/Target_Thrd_Vals_int.size());
+  int start = 0;
+  int check, test_case;
+  for (int senario = 0; senario < Target_Thrd_Vals_int.size(); senario++) {
+    if (Target_Thrd_Vals_int[senario] == 0) {
+      check = 0;
+      for (test_case = start; test_case< (start+inc); test_case++) {
+        if (check == Expected_Results_int[test_case]) {
+          pass_count++;
+        }
+      }
+      start+= inc;
+      continue;
+    }
+    hiprtcProgram prog;
+    HIPRTC_CHECK(hiprtcCreateProgram(&prog, max_thread_string,
+                                                   kername, 0, NULL, NULL));
+    if (Combination_CO_size != -1) {
+      std::string max_thread_string = variable[senario];
+      Combination_CO[max_thread_pos] = max_thread_string.c_str();
+      hiprtcResult compileResult{hiprtcCompileProgram(prog,
+                                                      Combination_CO_size,
+                                                      Combination_CO)};
+      if (!(compileResult == HIPRTC_SUCCESS)) {
+        WARN("Compiler Option : " << appended_compiler_options[senario]);
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+        WARN("hiprtcCompileProgram() api failed!! with error code: ");
+        WARN(compileResult);
+        size_t logSize;
+        HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+        if (logSize) {
+          std::string log(logSize, '\0');
+          HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+          WARN(log);
+        }
+        return 0;
+      }
+    } else {
+      hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
+                                       &appended_compiler_options[senario])};
+      if (!(compileResult == HIPRTC_SUCCESS)) {
+        WARN("Compiler Option : " << appended_compiler_options[senario]);
+        WARN("hiprtcCompileProgram() api failed!! with error code: ");
+        WARN(compileResult);
+        size_t logSize;
+        HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+        if (logSize) {
+          std::string log(logSize, '\0');
+          HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+          WARN(log);
+        }
+        return 0;
+      }
+    }
+    size_t codeSize;
+    HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
+    std::vector<char> codec(codeSize);
+    HIPRTC_CHECK(hiprtcGetCode(prog, codec.data()));
+    for (test_case = start; test_case< (start+inc); test_case++) {
+      int num_threads_h = 0;
+      int* ptr_num_threads_h = &num_threads_h;
+      int* Thread_count_d;
+      HIP_CHECK(hipMalloc(&Thread_count_d, sizeof(int)));
+      HIP_CHECK(hipMemcpy(Thread_count_d, ptr_num_threads_h, sizeof(int),
+                hipMemcpyHostToDevice));
+      void* kernelParam[] = {Thread_count_d};
+      auto size = sizeof(kernelParam);
+      void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
+                          HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
+                          HIP_LAUNCH_PARAM_END};
+      hipModule_t module;
+      hipFunction_t function;
+      HIP_CHECK(hipModuleLoadData(&module, codec.data()));
+      HIP_CHECK(hipModuleGetFunction(&function, module, kername));
+      hipError_t status = hipModuleLaunchKernel(function, 1, 1, 1,
+                                                Input_Thrd_Vals_int[test_case],
+                                                1, 1, 0, 0, nullptr,
+                                                kernel_parameter);
+      HIP_CHECK(hipMemcpy(ptr_num_threads_h, Thread_count_d, sizeof(int),
+                          hipMemcpyDeviceToHost));
+      if ((status == hipSuccess) &&
+          (num_threads_h <= Target_Thrd_Vals_int[senario])) {
+        check = 1;
+      } else {
+        check = 0;
+      }
+      if (check != Expected_Results_int[test_case]) {
+        WARN("Compiler Option : " << appended_compiler_options[senario]);
+        if (Combination_CO_size != -1) {
+          WARN("FAILED IN COMBINATION :");
+          std::string max_thread_string = variable[senario];
+          Combination_CO[max_thread_pos] = max_thread_string.c_str();
+          for (int i = 0; i < Combination_CO_size; i++) {
+            WARN(Combination_CO[i]);
+          }
+        }
+        WARN("EXPECTED RESULT DOES NOT MATCH FOR " << test_case);
+        WARN("th ITERATION (start iteration is 0 ) ");
+        WARN("IP THREAD VAL: " << Input_Thrd_Vals_int[test_case]);
+        WARN("EXPECTED OP: "<< Expected_Results_int[test_case]);
+        WARN("OBTAINED OP: "<< check);
+        return 0;
+      }
+      HIP_CHECK(hipDeviceSynchronize());
+      HIP_CHECK(hipModuleUnload(module));
+    }
+    start+=inc;
+    HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
+  }
+  return 1;
+}
+
+bool check_unsafe_atomic_enabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "unsafe_atomic";
+  std::string compiler_option = get_string_parameters("compiler_option",
+                                                 block_name);
+  if (compiler_option == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  const char *compiler_option_cstr = compiler_option.c_str();
+  float *A_d;
+  const int N = 1000;
+  float A_h[N];
+  float Nbytes = N * sizeof(float);
+  double sum_w = 0, sum_wo = 0, sum_tocheck = 0;
+  for (int i = 0; i < N; i++) {
+    A_h[i] = 0.1f;
+    sum_tocheck += A_h[i] + 0.2f;
+  }
+  HIP_CHECK(hipMalloc(&A_d, Nbytes));
+  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
+  for (int senario = 0; senario < 2; senario ++) {
+    hiprtcProgram prog;
+    HIPRTC_CHECK(hiprtcCreateProgram(&prog, unsafe_atomic_string,
+                                                  kername, 0, NULL, NULL));
+    if (Combination_CO_size != -1) {
+      hiprtcResult compileResult{hiprtcCompileProgram(prog,
+                                                      Combination_CO_size,
+                                                      Combination_CO)};
+      if (!(compileResult == HIPRTC_SUCCESS)) {
+        WARN("Compiler Option : " << compiler_option);
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+        WARN("hiprtcCompileProgram() api failed!! with error code: ");
+        WARN(compileResult);
+        size_t logSize;
+        HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+        if (logSize) {
+          std::string log(logSize, '\0');
+          HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+          WARN(log);
+        }
+        return 0;
+      }
+    } else {
+      hiprtcResult compileResult{hiprtcCompileProgram(prog, 1,
+                                                      &compiler_option_cstr)};
+      if (!(compileResult == HIPRTC_SUCCESS)) {
+        WARN("Compiler Option : " << compiler_option);
+        WARN("hiprtcCompileProgram() api failed!! with error code: ");
+        WARN(compileResult);
+          size_t logSize;
+        HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+        if (logSize) {
+          std::string log(logSize, '\0');
+          HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+          WARN(log);
+        }
+        return 0;
+      }
+    }
+    size_t codeSize;
+    HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
+    std::vector<char> codec(codeSize);
+    HIPRTC_CHECK(hiprtcGetCode(prog, codec.data()));
+    void* kernelParam[] = {A_d};
+    auto size = sizeof(kernelParam);
+    void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
+                                HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
+                                HIP_LAUNCH_PARAM_END};
+    hipModule_t module;
+    hipFunction_t function;
+    HIP_CHECK(hipModuleLoadData(&module, codec.data()));
+    HIP_CHECK(hipModuleGetFunction(&function, module, kername));
+    HIP_CHECK(hipModuleLaunchKernel(function, N, 1, 1, N, 1, 1, 0, 0,
+                                    nullptr, kernel_parameter));
+    HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
+    for (int i = 0; i < N; i++) {
+      if (senario == 0) {
+        sum_wo += A_h[i];
+      } else {
+        sum_w += A_h[i];
+      }
+    }
+    HIP_CHECK(hipDeviceSynchronize());
+    HIP_CHECK(hipModuleUnload(module));
+    HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
+  }
+  if (sum_w != sum_tocheck) {
+    return 1;
+  } else {
+    WARN("Compiler Option : " << compiler_option);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("EXPECTED : " << sum_w << " != " << sum_tocheck);
+    return 0;
+  }
+}
+
+bool check_unsafe_atomic_disabled(const char** Combination_CO,
+                                 int Combination_CO_size, int max_thread_pos,
+                                 int fast_math_present) {
+  std::string block_name = "unsafe_atomic";
+  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  const char* compiler_option = retrieved_CO.c_str();
+  float *A_d;
+  const int N = 1000;
+  float A_h[N];
+  float Nbytes = N * sizeof(float);
+  double sum = 0, sum_tocheck = 0;
+  for (int i = 0; i < N; i++) {
+    A_h[i] = 0.1f;
+    sum_tocheck += A_h[i] + 0.2f;
+  }
+  HIP_CHECK(hipMalloc(&A_d, Nbytes));
+  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
+  hiprtcProgram prog;
+  HIPRTC_CHECK(hiprtcCreateProgram(&prog, unsafe_atomic_string,
+                                                kername, 0, NULL, NULL));
+  if (Combination_CO_size != -1) {
+      hiprtcResult compileResult{hiprtcCompileProgram(prog,
+                                                      Combination_CO_size,
+                                                      Combination_CO)};
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler Option : " << compiler_option);
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  } else {
+    hiprtcResult compileResult{hiprtcCompileProgram(prog, 1, &compiler_option)};
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler Option : " << compiler_option);
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return 0;
+    }
+  }
+  size_t codeSize;
+  HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
+  std::vector<char> codec(codeSize);
+  HIPRTC_CHECK(hiprtcGetCode(prog, codec.data()));
+  void* kernelParam[] = {A_d};
+  auto size = sizeof(kernelParam);
+  void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
+                              HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
+                              HIP_LAUNCH_PARAM_END};
+  hipModule_t module;
+  hipFunction_t function;
+  HIP_CHECK(hipModuleLoadData(&module, codec.data()));
+  HIP_CHECK(hipModuleGetFunction(&function, module, kername));
+  HIP_CHECK(hipModuleLaunchKernel(function, N, 1, 1, N, 1, 1, 0, 0,
+                                  nullptr, kernel_parameter));
+  HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
+  for (int i = 0; i < N; i++) {
+    sum += A_h[i];
+  }
+  HIP_CHECK(hipDeviceSynchronize());
+  HIP_CHECK(hipModuleUnload(module));
+  HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
+  if (sum == sum_tocheck) {
+    return 1;
+  } else {
+    WARN("Compiler Option : " << compiler_option);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("EXPECTED RESULT IS NOT OBTAINED ");
+    WARN("EXPECTED RESULT: "<< sum_tocheck);
+    WARN("OBTAINED RESULT: "<< sum);
+    return 0;
+  }
+}
+
+bool check_infinite_num_enabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "infinite_num";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string retrieved_CO = get_string_parameters("compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  int CO_IRadded_size = 3, a = 0;
+  const char** CO_IRadded = new const char*[3];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                 Combination_CO, Combination_CO_size);
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (fast_math_present != -1) {
+    if (fast_math_present == 0 && data.find("contract") != -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+     }
+      WARN("IR DOESN'T CONTAIN 'contract' ");
+      return 0;
+    }
+  } else {
+    if (data.find("ninf")!= -1) {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+     }
+      WARN("IR DOESN'T CONTAIN 'ninf' ");
+      return 0;
+    } else {
+      return 1;
+    }
+  }
+}
+
+bool check_infinite_num_disabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "infinite_num";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  int CO_IRadded_size = 3, a = 0;
+  const char** CO_IRadded = new const char*[3];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                 Combination_CO, Combination_CO_size);
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (fast_math_present != -1) {
+    if (fast_math_present == 1 && data.find("fmul fast")!= -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+     }
+      WARN("IR DOESN'T CONTAIN 'fmul fast' ");
+      return 0;
+    }
+  } else {
+    if (data.find("ninf")!= -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN 'ninf' ");
+      return 0;
+    }
+  }
+}
+
+bool check_NAN_num_enabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "NAN_num";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string retrieved_CO = get_string_parameters("compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME ");
+    WARN(block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  int CO_IRadded_size = 3, a = 0;
+  const char** CO_IRadded = new const char*[3];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                 Combination_CO, Combination_CO_size);
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (fast_math_present!= -1) {
+    if (fast_math_present == 0 && data.find("contract")!= -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN 'contract' ");
+      return 0;
+    }
+  } else {
+    if (data.find("nnan")!= -1) {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN 'nnan' ");
+      return 0;
+    } else {
+      return 1;
+    }
+  }
+}
+
+bool check_NAN_num_disabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "NAN_num";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  int CO_IRadded_size = 3, a = 0;
+  const char** CO_IRadded = new const char*[3];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                 Combination_CO, Combination_CO_size);
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (fast_math_present!= -1) {
+    if (fast_math_present == 1 && data.find("fmul fast")!= -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN 'fmul fast' ");
+      return 0;
+    }
+  } else {
+    if (data.find("nnan")!= -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN 'nnan' ");
+      return 0;
+    }
+  }
+}
+
+bool check_finite_math_enabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "finite_math";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string retrieved_CO = get_string_parameters("compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  int CO_IRadded_size = 3, a = 0;
+  const char** CO_IRadded = new const char*[3];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                 Combination_CO, Combination_CO_size);
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (fast_math_present!= -1) {
+    if (fast_math_present == 1 && data.find("fmul fast")!= -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN 'fmul fast'");
+      return 0;
+    }
+  } else {
+    if (data.find("nnan")!= -1 && (data.find("ninf") != -1)) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN 'nnan' or 'ninf' or both ");
+      return 0;
+    }
+  }
+}
+
+bool check_finite_math_disabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "finite_math";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  int CO_IRadded_size = 3, a = 0;
+  const char** CO_IRadded = new const char*[3];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                 Combination_CO, Combination_CO_size);
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (fast_math_present!= -1) {
+    if (fast_math_present == 0 && data.find("contract")!= -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN 'contract'");
+      return 0;
+    }
+  } else {
+    if (data.find("nnan")!= -1 && (data.find("ninf") != -1)) {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR CONTAIN 'nnan' or 'ninf' or both WHICH IS NOT EXPECTED ");
+      return 0;
+    } else {
+      return 1;
+    }
+  }
+}
+
+bool check_associative_math_enabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "associative_math";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string retrieved_CO = get_string_parameters("compiler_option",
+                                                  block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  int CO_IRadded_size = 4, a = 0;
+  const char** CO_IRadded = new const char*[4];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-fno-signed-zeros";
+  CO_IRadded[2] = "-mllvm";
+  CO_IRadded[3] = "-print-after=constmerge";
+  std::string data;
+  if (Combination_CO_size != -1) {
+    int Combination_CO_IRadded_size = Combination_CO_size+1;
+    int b = 0;
+    std::vector<std::string> add_ir_forcombi(Combination_CO_size + 1, "");
+    const char** Combination_CO_IRadded =
+                                   new const char*[Combination_CO_size+1];
+    for (int i = 0; i < Combination_CO_size+1; ++i) {
+      if (i == Combination_CO_size) {
+        Combination_CO_IRadded[i] = "-fno-signed-zeros";
+        break;
+      }
+      add_ir_forcombi[i] = Combination_CO[b];
+      Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str();
+      b++;
+    }
+    data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                                      Combination_CO_IRadded,
+                                                Combination_CO_IRadded_size);
+  } else {
+    data = checking_IR(kername, CO_IRadded, CO_IRadded_size, Combination_CO,
+                                                         Combination_CO_size);
+  }
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (fast_math_present!= -1) {
+    if (fast_math_present == 1 && data.find("fmul fast")!= -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN 'fmul fast' ");
+      return 0;
+    }
+  } else {
+    if (data.find("reassoc") != -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN 'reassoc' ");
+      WARN(data);
+      return 0;
+    }
+  }
+}
+
+bool check_associative_math_disabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "associative_math";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
+                                                  block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  int CO_IRadded_size = 4, a = 0;
+  const char** CO_IRadded = new const char*[4];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-fno-signed-zeros";
+  CO_IRadded[2] = "-mllvm";
+  CO_IRadded[3] = "-print-after=constmerge";
+  std::string data;
+  if (Combination_CO_size != -1) {
+    int Combination_CO_IRadded_size = Combination_CO_size+1;
+    int b = 0;
+    std::vector<std::string> add_ir_forcombi(Combination_CO_size + 1, "");
+    const char** Combination_CO_IRadded =
+                                   new const char*[Combination_CO_size+1];
+    for (int i = 0; i < Combination_CO_size+1; ++i) {
+      if (i == Combination_CO_size) {
+        Combination_CO_IRadded[i] = "-fno-signed-zeros";
+        break;
+      }
+      add_ir_forcombi[i] = Combination_CO[b];
+      Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str();
+      b++;
+    }
+    data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                                      Combination_CO_IRadded,
+                                                Combination_CO_IRadded_size);
+  } else {
+    data = checking_IR(kername, CO_IRadded, CO_IRadded_size, Combination_CO,
+                                                         Combination_CO_size);
+  }
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (fast_math_present!= -1) {
+    if (fast_math_present == 0 && data.find("contract")!= -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN 'contract' ");
+      return 0;
+    }
+  } else {
+    if (data.find("reassoc")!= -1) {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR CONTAIN 'reassoc' WHICH IS NOT EXPECTED ");
+      return 0;
+    } else {
+      return 1;
+    }
+  }
+}
+
+bool check_signed_zeros_enabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "signed_zeros";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string retrieved_CO = get_string_parameters("compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  int CO_IRadded_size = 3, a = 0;
+  const char** CO_IRadded = new const char*[3];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                 Combination_CO, Combination_CO_size);
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (fast_math_present!= -1) {
+    if (fast_math_present == 0 && data.find("contract")!= -1) {
+      return 1;
+    } else {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR DOESN'T CONTAIN 'contract' ");
+    return 0;
+    }
+  } else {
+    if (data.find("nsz") != -1) {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR CONTAIN 'nsz' WHICH IS NOT EXPECTED ");
+      return 0;
+    } else {
+      return 1;
+    }
+  }
+}
+
+bool check_signed_zeros_disabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "signed_zeros";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  int CO_IRadded_size = 3, a = 0;
+  const char** CO_IRadded = new const char*[3];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                 Combination_CO, Combination_CO_size);
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (fast_math_present!= -1) {
+    if (fast_math_present == 1 && data.find("fmul fast")!= -1) {
+      return 1;
+    } else {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR DOESN'T CONTAIN 'fmul fast' ");
+    return 0;
+    }
+  } else {
+    if (data.find("nsz") != -1) {
+      return 1;
+    } else {
+      WARN("Compiler option : " << retrieved_CO);
+      if (Combination_CO_size != -1) {
+        WARN("FAILED IN COMBINATION :");
+        for (int i = 0; i < Combination_CO_size; i++) {
+          WARN(Combination_CO[i]);
+        }
+      }
+      WARN("IR DOESN'T CONTAIN 'nsz' ");
+      return 0;
+    }
+  }
+}
+
+bool check_trapping_math_enabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "trapping_math";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string retrieved_CO = get_string_parameters("compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  int CO_IRadded_size = 3, a = 0;
+  const char** CO_IRadded = new const char*[3];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                 Combination_CO, Combination_CO_size);
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (data.find("\"no-trapping-math\"=\"true\"") != -1) {
+    return 1;
+  } else {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR DOESN'T CONTAIN '\"no-trapping-math\"=\"true\"'");
+    return 0;
+  }
+}
+
+bool check_trapping_math_disabled(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present) {
+  std::string block_name = "trapping_math";
+  std::string kernel_name = get_string_parameters("kernel_name", block_name);
+  const char* kername = kernel_name.c_str();
+  std::string retrieved_CO = get_string_parameters("reverse_compiler_option",
+                                                 block_name);
+  if (retrieved_CO == "") {
+    WARN("COMPILER OPTION NOT PROVIDED FOR BLOCK NAME " << block_name);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    return 0;
+  }
+  int CO_IRadded_size = 3, a = 0;
+  const char** CO_IRadded = new const char*[3];
+  CO_IRadded[0] = retrieved_CO.c_str();
+  CO_IRadded[1] = "-mllvm";
+  CO_IRadded[2] = "-print-after=constmerge";
+  std::string data = checking_IR(kername, CO_IRadded, CO_IRadded_size,
+                                 Combination_CO, Combination_CO_size);
+  if (data == "") {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR NOT GENERATED");
+    return 0;
+  }
+  if (data.find("\"no-trapping-math\"=\"true\"") != -1) {
+    return 1;
+  } else {
+    WARN("Compiler option : " << retrieved_CO);
+    if (Combination_CO_size != -1) {
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+    }
+    WARN("IR DOESN'T CONTAIN '\"no-trapping-math\"=\"true\"'");
+    return 0;
+  }
+}
+
+std::string checking_IR(const char* kername, const char** extra_CO_IRadded,
+                    int extra_CO_IRadded_size, const char** Combination_CO,
+                    int Combination_CO_size) {
+  float *A_d, *B_d, *C_d;
+  float *A_h, *B_h, *C_h, *result;
+  float Nbytes = sizeof(float);
+  A_h = new float[1];
+  B_h = new float[1];
+  C_h = new float[1];
+  result = new float[1];
+  for (int i = 0; i < 1; i++) {
+    A_h[i] = 0.1f;
+    B_h[i] = 0.1f;
+    C_h[i] = 0.1f;
+    result[i] = 0.2f;
+  }
+  HIP_CHECK(hipMalloc(&A_d, Nbytes));
+  HIP_CHECK(hipMalloc(&B_d, Nbytes));
+  HIP_CHECK(hipMalloc(&C_d, Nbytes));
+  HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
+  HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
+  HIP_CHECK(hipMemcpy(C_d, C_h, Nbytes, hipMemcpyHostToDevice));
+  hiprtcProgram prog;
+  HIPRTC_CHECK(hiprtcCreateProgram(&prog, ffp_contract_string,
+                                                kername, 0, NULL, NULL));
+  int Combination_CO_IRadded_size;
+  CaptureStream capture(stderr);
+  if (Combination_CO_size != -1) {
+    Combination_CO_IRadded_size = Combination_CO_size+2;
+    int b = 0;
+    std::vector<std::string> add_ir_forcombi(Combination_CO_size + 2, "");
+    const char** Combination_CO_IRadded =
+                                  new const char*[Combination_CO_size+2];
+    for (int i = 0; i < Combination_CO_size+2; ++i) {
+      if (i == Combination_CO_size) {
+        Combination_CO_IRadded[i] = "-mllvm";
+        Combination_CO_IRadded[i+1] = "-print-after=constmerge";
+        break;
+      }
+      add_ir_forcombi[i] = Combination_CO[b];
+      Combination_CO_IRadded[i] = add_ir_forcombi[i].c_str();
+      b++;
+    }
+    capture.Begin();
+    hiprtcResult compileResult{hiprtcCompileProgram(prog,
+                                                   Combination_CO_IRadded_size,
+                                                    Combination_CO_IRadded)};
+    capture.End();
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("Compiler option : " <<  extra_CO_IRadded[0]);
+      WARN("FAILED IN COMBINATION :");
+      for (int i = 0; i < Combination_CO_size; i++) {
+        WARN(Combination_CO[i]);
+      }
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return "";
+    }
+  } else {
+    capture.Begin();
+    hiprtcResult compileResult{hiprtcCompileProgram(prog,
+                                                    extra_CO_IRadded_size,
+                                                    extra_CO_IRadded)};
+    capture.End();
+    if (!(compileResult == HIPRTC_SUCCESS)) {
+      WARN("hiprtcCompileProgram() api failed!! with error code: ");
+      WARN(compileResult);
+      size_t logSize;
+      HIPRTC_CHECK(hiprtcGetProgramLogSize(prog, &logSize));
+      if (logSize) {
+        std::string log(logSize, '\0');
+        HIPRTC_CHECK(hiprtcGetProgramLog(prog, &log[0]));
+        WARN(log);
+      }
+      return"";
+    }
+  }
+  size_t codeSize;
+  HIPRTC_CHECK(hiprtcGetCodeSize(prog, &codeSize));
+  std::vector<char> codec(codeSize);
+  HIPRTC_CHECK(hiprtcGetCode(prog, codec.data()));
+  void* kernelParam[] = {A_d, B_d, C_d};
+  auto size = sizeof(kernelParam);
+  void* kernel_parameter[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &kernelParam,
+                              HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
+                              HIP_LAUNCH_PARAM_END};
+  hipModule_t module;
+  hipFunction_t function;
+  HIP_CHECK(hipModuleLoadData(&module, codec.data()));
+  HIP_CHECK(hipModuleGetFunction(&function, module, kername));
+  HIP_CHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, nullptr,
+                                  kernel_parameter));
+  HIP_CHECK(hipMemcpy(result, C_d, Nbytes, hipMemcpyDeviceToHost));
+  for (int i = 0; i< 1; i++) {
+    if (result[i] != ((A_h[i] * B_h[i]) + C_h[i])) {
+      return "";
+    }
+  }
+  std::string data = capture.getData();
+  std::stringstream dataStream;
+  HIP_CHECK(hipModuleUnload(module));
+  HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
+  return data;
+}
diff --git a/projects/hip-tests/catch/unit/rtc/RtcUtility.cpp b/projects/hip-tests/catch/unit/rtc/RtcUtility.cpp
index 3caccdb4a5..cc89f5b4cf 100644
--- a/projects/hip-tests/catch/unit/rtc/RtcUtility.cpp
+++ b/projects/hip-tests/catch/unit/rtc/RtcUtility.cpp
@@ -1,508 +1,508 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sindxl
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-This file has definition of functions for the following functinality:
-
-1) get_combi_string_vec() : Retrieve the combination string which contains
-contains the combination of block name which indicate the respective compiler
-option seperated by ':' from RtcConfig.jason file and returns them in the
-form of vectors.
-
-2) split_comb_string() : The combination of blockname which are seperated by
-':' has to split so that their respective compiler option can be retrieved
-from the json file. This functn internally calls calling_combination_function()
-for each of the combination of compiler options. This function returns a
-int value i.e the total failed cases in that combination which is obtained
-by calling_combination_function() function.
-
-3) calling_combination_function() : This function takes the combination of
-blockname as the input. The respective compiler option for that block name is
-retrieved from the json file and store the compiler options in a array.
-calling_resp_function() is called which mapps the compiler option function
-which has to be called with a set of required parameters
-(combination of compiler options is one among them). this function returns
-the status of execution ie 1 or 0 (bool).
-
-4) getblock_fromconfig() : This function is used to open the RtcConfig.json
-file and return the blocks.
-
-5) get_string_parameters() and get_array_parameters() : retrieved the
-parameters of the respective block name.
-
-*/
-
-#include <hip/hiprtc.h>
-#include <hip/hip_runtime.h>
-#include <picojson.h>
-#include <vector>
-#include <string>
-#include <fstream>
-#include <iostream>
-#include "headers/RtcUtility.h"
-#include "headers/RtcFunctions.h"
-#include "headers/RtcKernels.h"
-#include <hip_test_common.hh>
-#include "headers/printf_common.h"
-
-#pragma clang diagnostic ignored "-Wunused-but-set-variable"
-
-std::vector<std::string> get_combi_string_vec() {
-  picojson::array combi_string = get_array_parameters("Combi_CO",
-                                                      "all_compier_options");
-  std::vector<std::string> combi_string_list;
-  for (auto& indx : combi_string) {
-    combi_string_list.push_back(indx.get<std::string>());
-  }
-  return combi_string_list;
-}
-
-int split_comb_string(std::string option) {
-  int start_collon_index = option.find(':');
-  int start_index = 0;
-  std::vector<std::string> combi_block_name;
-  while (start_collon_index != std::string::npos) {
-    std::string singleoption = option.substr(start_index,
-                               start_collon_index - start_index);
-    combi_block_name.push_back(singleoption);
-    start_index = start_collon_index + 1;
-    start_collon_index = option.find(':', start_index);
-  }
-  std::string last_option = option.substr(start_index,
-                            option.length() - start_index);
-  combi_block_name.push_back(last_option);
-  return calling_combination_function(combi_block_name);
-}
-
-int calling_combination_function(std::vector<std::string> combi_vec_list) {
-  int combi_size = combi_vec_list.size();
-  int fast_math_present = -1, undef_present = 0;
-  int max_thread_position;
-  std::vector<std::string> hold_CO(combi_size, "");
-  const char** Combination_CO = new const char*[combi_size];
-  picojson::array undef_compiler_option = get_array_parameters(
-                                          "compiler_option", "undef_macro");
-  std::vector<std::string> undef_CO_vec;
-  for (auto& indx : undef_compiler_option) {
-    undef_CO_vec.push_back(indx.get<std::string>());
-  }
-  for (int i=0; i< combi_size; i++) {
-    if (combi_vec_list[i] == "max_thread") {
-      std::string ready_CO = get_string_parameters("ready_compiler_option",
-                                                    combi_vec_list[i]);
-      hold_CO[i] = ready_CO;
-      if (combi_vec_list[i] == "max_thread") {
-        max_thread_position = i;
-      }
-    } else if (combi_vec_list[i] == "header_dir") {
-      std::string retrived_CO = get_string_parameters("compiler_option",
-                                                      "header_dir");
-      std::string str = "pwd";
-      const char *cmd = str.c_str();
-      CaptureStream capture(stdout);
-      capture.Begin();
-      system(cmd);
-      capture.End();
-      std::string wor_dir = capture.getData();
-      std::string break_dir = wor_dir.substr(0, wor_dir.find("build"));
-      std::string append_str = "catch/unit/rtc/headers";
-      std::string CO = retrived_CO + " " + break_dir + append_str;
-      hold_CO[i] = CO;
-    } else if (combi_vec_list[i] == "architecture") {
-      std::string retrived_CO = get_string_parameters("compiler_option",
-                                                      "architecture");
-      hipDeviceProp_t prop;
-      HIP_CHECK(hipGetDeviceProperties(&prop, 0));
-      std::string actual_architecture = prop.gcnArchName;
-      std::string complete_CO = retrived_CO + actual_architecture;
-      hold_CO[i] = complete_CO;
-    } else if (check_positive_CO_present(combi_vec_list[i]) == 1) {
-      std::string positive_CO = get_string_parameters("compiler_option",
-                                                      combi_vec_list[i]);
-      hold_CO[i] = positive_CO;
-      if (combi_vec_list[i] == "fast_math")
-        fast_math_present = 1;
-    } else if (check_negative_CO_present(combi_vec_list[i]) == 1) {
-      std::string split_block_name = combi_vec_list[i].substr(3,
-                                               combi_vec_list[i].length() - 3);
-      std::string negative_CO = get_string_parameters(
-                                "reverse_compiler_option", split_block_name);
-      hold_CO[i] = negative_CO;
-      if (split_block_name == "fast_math")
-        fast_math_present = 0;
-    } else if ( combi_vec_list[i] == "conversion_error"
-             || combi_vec_list[i] == "conversion_no_error"
-             || combi_vec_list[i] == "conversion_no_warning"
-             || combi_vec_list[i] == "conversion_warning") {
-      picojson::array compiler_option = get_array_parameters("compiler_option",
-                                                             "error");
-      std::vector<std::string> CO_vec;
-      for (auto& indx : compiler_option) {
-        CO_vec.push_back(indx.get<std::string>());
-      }
-      if (combi_vec_list[i] == "conversion_error") {
-        hold_CO[i] = CO_vec[0];
-      } else if (combi_vec_list[i] == "conversion_no_error") {
-        hold_CO[i] = CO_vec[1];
-      } else if (combi_vec_list[i] == "conversion_warning") {
-        hold_CO[i] = CO_vec[2];
-      } else if (combi_vec_list[i] == "conversion_no_warning") {
-        hold_CO[i] = CO_vec[3];
-      }
-    } else if (combi_vec_list[i] == "off_ffp_contract"
-             || combi_vec_list[i] == "on_ffp_contract"
-             || combi_vec_list[i] == "fast_ffp_contract"
-             || combi_vec_list[i] == "pragmas_ffp_contract") {
-      picojson::array compiler_option = get_array_parameters("compiler_option",
-                                                             "ffp_contract");
-      std::vector<std::string> CO_vec;
-      for (auto& indx : compiler_option) {
-        CO_vec.push_back(indx.get<std::string>());
-      }
-      if (combi_vec_list[i] == "off_ffp_contract") {
-        hold_CO[i] = CO_vec[0];
-      } else if (combi_vec_list[i] == "on_ffp_contract") {
-        hold_CO[i] = CO_vec[1];
-      } else if (combi_vec_list[i] == "fast_ffp_contract") {
-        hold_CO[i] = CO_vec[2];
-      } else if (combi_vec_list[i] == "pragmas_ffp_contract") {
-        hold_CO[i] = CO_vec[3];
-      }
-    } else if (combi_vec_list[i] =="undef_macro") {
-      hold_CO[i] = undef_CO_vec[1].c_str();
-      undef_present = 1;
-    } else {
-      WARN("BLOCK NAME " << combi_vec_list[i] << " NOT PRESENT");
-    }
-    Combination_CO[i] = hold_CO[i].c_str();
-  }
-  int errors = 0;
-  for (int j = 0; j< combi_size; j++) {
-    std::string block_name = combi_vec_list[j].c_str();
-    if (!calling_resp_function(block_name, Combination_CO, combi_size,
-                               max_thread_position, fast_math_present)) {
-      errors++;
-    }
-    Combination_CO[j] = hold_CO[j].c_str();
-  }
-  return errors;
-}
-
-int check_positive_CO_present(std::string find_string) {
-  static std::vector<std::string> positive_CO = {"macro", "warning", "rdc",
-                                                 "denormals", "fp32_div_sqrt",
-                                                 "Rpass_inline", "fast_math",
-                                                 "slp_vectorize",
-                                                 "amdgpu_ieee",
-                                                 "unsafe_atomic",
-                                                 "infinite_num", "NAN_num",
-                                                 "slp_vectorize", "math_errno",
-                                                 "associative_math",
-                                                 "signed_zeros", "finite_math",
-                                                 "trapping_math"};
-  if (std::find(positive_CO.begin(), positive_CO.end(),
-      find_string) != positive_CO.end())
-    return 1;
-  else
-    return 0;
-}
-
-int check_negative_CO_present(std::string find_string) {
-  static std::vector<std::string> negative_CO = {"no_fast_math",
-                                                 "no_fp32_div_sqrt",
-                                                 "no_denormals",
-                                                 "no_slp_vectorize",
-                                                 "no_amdgpu_ieee",
-                                                 "no_unsafe_atomic",
-                                                 "no_infinite_num",
-                                                 "no_slp_vectorize",
-                                                 "no_NAN_num",
-                                                 "no_math_errno",
-                                                 "no_associative_math",
-                                                 "no_signed_zeros",
-                                                 "no_finite_math",
-                                                 "no_trapping_math"};
-  if (std::find(negative_CO.begin(), negative_CO.end(),
-      find_string) != negative_CO.end())
-    return 1;
-  else
-    return 0;
-}
-
-bool  calling_resp_function(const std::string block_name,
-                           const char** Combination_CO,
-                           int Combination_CO_size, int max_thread_position,
-                           int fast_math_present) {
-  if (block_name == "max_thread") {
-    return check_max_thread(Combination_CO, Combination_CO_size,
-                            max_thread_position, fast_math_present);
-  } else if (block_name == "architecture") {
-    return check_architecture(Combination_CO, Combination_CO_size,
-                              max_thread_position, fast_math_present);
-  } else if (block_name == "rdc") {
-    return check_rdc(Combination_CO, Combination_CO_size,
-                     max_thread_position, fast_math_present);
-  } else if (block_name == "denormals") {
-    return check_denormals_enabled(Combination_CO, Combination_CO_size,
-                                   max_thread_position, fast_math_present);
-  } else if (block_name == "no_denormals") {
-    return check_denormals_disabled(Combination_CO, Combination_CO_size,
-                                    max_thread_position, fast_math_present);
-  } else if (block_name == "warning") {
-    return check_warning(Combination_CO, Combination_CO_size,
-                         max_thread_position, fast_math_present);
-  } else if (block_name == "conversion_error") {
-    return check_conversionerror_enabled(Combination_CO, Combination_CO_size,
-                                         max_thread_position,
-                                         fast_math_present);
-  } else if (block_name == "conversion_no_error") {
-    return check_conversionerror_disabled(Combination_CO, Combination_CO_size,
-                                          max_thread_position,
-                                          fast_math_present);
-  } else if (block_name == "conversion_warning") {
-    return check_conversionwarning_enabled(Combination_CO, Combination_CO_size,
-                                           max_thread_position,
-                                           fast_math_present);
-  } else if (block_name == "conversion_no_warning") {
-    return check_conversionwarning_disabled(Combination_CO,
-                                            Combination_CO_size,
-                                            max_thread_position,
-                                            fast_math_present);
-  } else if (block_name == "Rpass_inline") {
-    return check_Rpass_inline(Combination_CO, Combination_CO_size,
-                              max_thread_position, fast_math_present);
-  } else if (block_name == "macro") {
-    return check_macro(Combination_CO, Combination_CO_size,
-                       max_thread_position, fast_math_present);
-  } else if (block_name == "undef_macro") {
-    return check_undef_macro(Combination_CO, Combination_CO_size,
-                             max_thread_position, fast_math_present);
-  } else if (block_name == "header_dir") {
-    return check_header_dir(Combination_CO, Combination_CO_size,
-                            max_thread_position, fast_math_present);
-  } else if (block_name == "no_fast_math") {
-    return check_fast_math_disabled(Combination_CO, Combination_CO_size,
-                                    max_thread_position, fast_math_present);
-  } else if (block_name == "fast_math") {
-    return check_fast_math_enabled(Combination_CO, Combination_CO_size,
-                                   max_thread_position, fast_math_present);
-  } else if (block_name == "off_ffp_contract") {
-    return check_ffp_contract_off(Combination_CO, Combination_CO_size,
-                                  max_thread_position, fast_math_present);
-  } else if (block_name == "on_ffp_contract") {
-    return check_ffp_contract_on(Combination_CO, Combination_CO_size,
-                                 max_thread_position, fast_math_present);
-  } else if (block_name == "fast_ffp_contract") {
-    return check_ffp_contract_fast(Combination_CO, Combination_CO_size,
-                                   max_thread_position, fast_math_present);
-  } else if (block_name == "no_unsafe_atomic") {
-    return check_unsafe_atomic_disabled(Combination_CO, Combination_CO_size,
-                                        max_thread_position,
-                                        fast_math_present);
-  } else if (block_name == "unsafe_atomic") {
-    return check_unsafe_atomic_enabled(Combination_CO, Combination_CO_size,
-                                       max_thread_position,
-                                       fast_math_present);
-  } else if (block_name == "no_slp_vectorize") {
-    return check_slp_vectorize_disabled(Combination_CO, Combination_CO_size,
-                                        max_thread_position,
-                                        fast_math_present);
-  } else if (block_name == "slp_vectorize") {
-    return check_slp_vectorize_enabled(Combination_CO, Combination_CO_size,
-                                       max_thread_position,
-                                       fast_math_present);
-  } else if (block_name == "infinite_num") {
-    return check_infinite_num_enabled(Combination_CO, Combination_CO_size,
-                                      max_thread_position,
-                                      fast_math_present);
-  } else if (block_name == "no_infinite_num") {
-    return check_infinite_num_disabled(Combination_CO, Combination_CO_size,
-                                       max_thread_position,
-                                       fast_math_present);
-  } else if (block_name == "NAN_num") {
-    return check_NAN_num_enabled(Combination_CO, Combination_CO_size,
-                                 max_thread_position, fast_math_present);
-  } else if (block_name == "no_NAN_num") {
-    return check_NAN_num_disabled(Combination_CO, Combination_CO_size,
-                                  max_thread_position, fast_math_present);
-  } else if (block_name == "finite_math") {
-    return check_finite_math_enabled(Combination_CO, Combination_CO_size,
-                                     max_thread_position, fast_math_present);
-  } else if (block_name == "no_finite_math") {
-    return check_finite_math_disabled(Combination_CO, Combination_CO_size,
-                                      max_thread_position, fast_math_present);
-  } else if (block_name == "associative_math") {
-    return check_associative_math_enabled(Combination_CO, Combination_CO_size,
-                                          max_thread_position,
-                                          fast_math_present);
-  } else if (block_name == "no_associative_math") {
-    return check_associative_math_disabled(Combination_CO, Combination_CO_size,
-                                           max_thread_position,
-                                           fast_math_present);
-  } else if (block_name == "signed_zeros") {
-    return check_signed_zeros_enabled(Combination_CO, Combination_CO_size,
-                                      max_thread_position,
-                                      fast_math_present);
-  } else if (block_name == "no_signed_zeros") {
-    return check_signed_zeros_disabled(Combination_CO, Combination_CO_size,
-                                       max_thread_position,
-                                       fast_math_present);
-  } else if (block_name == "trapping_math") {
-    return check_trapping_math_enabled(Combination_CO, Combination_CO_size,
-                                       max_thread_position,
-                                       fast_math_present);
-  } else if (block_name == "no_trapping_math") {
-    return check_trapping_math_disabled(Combination_CO, Combination_CO_size,
-                                        max_thread_position,
-                                        fast_math_present);
-  } else {
-    WARN("BLOCK NAME '" << block_name << "' not found");
-    return 0;
-  }
-}
-
-picojson::array getblock_fromconfig() {
-  std::string str = "pwd";
-  const char *cmd = str.c_str();
-  CaptureStream capture(stdout);
-  capture.Begin();
-  system(cmd);
-  capture.End();
-  std::string wor_dir = capture.getData();
-  std::string break_dir = wor_dir.substr(0, wor_dir.find("build"));
-  std::string append_str = "catch/unit/rtc/RtcConfig.json";
-  std::string config_path = break_dir + append_str;
-  std::string returnValue = "";
-  std::ifstream json_file(config_path.c_str());
-  if (!json_file.is_open()) {
-    WARN("Error loading config.jason");
-    exit(0);
-  }
-  std::string json_str((std::istreambuf_iterator<char>(json_file)),
-                        std::istreambuf_iterator<char>());
-  picojson::value v;
-  std::string err = picojson::parse(v, json_str);
-  if (!err.empty()) {
-    WARN("empty config.jason");
-    exit(0);
-  }
-  picojson::array& blocks = v.get<picojson::array>();
-  return blocks;
-}
-
-std::string get_string_parameters(std::string para_name_to_retrieve,
-                                  std::string block_name) {
-  std::string returnValue = "";
-  picojson::array blocks = getblock_fromconfig();
-  for (picojson::value& block : blocks) {
-    picojson::object& block_obj = block.get<picojson::object>();
-    std::string blk_name = block_obj.at("block_name").get<std::string>();
-    if (blk_name == block_name) {
-      if (para_name_to_retrieve == "compiler_option") {
-        std::string compiler_opt =
-                            block_obj.at("compiler_option").get<std::string>();
-        returnValue += compiler_opt;
-      } else if (para_name_to_retrieve == "Target_Vals") {
-        std::string Target_Vals =
-                                block_obj.at("Target_Vals").get<std::string>();
-        returnValue += Target_Vals;
-      } else if (para_name_to_retrieve == "kernel_name") {
-        std::string ker_name = block_obj.at("kernel_name").get<std::string>();
-        returnValue += ker_name;
-      } else if (para_name_to_retrieve == "reverse_compiler_option") {
-        std::string reverse =
-                    block_obj.at("reverse_compiler_option").get<std::string>();
-        returnValue += reverse;
-      } else if (para_name_to_retrieve == "ready_compiler_option") {
-        std::string ready_CO =
-                      block_obj.at("ready_compiler_option").get<std::string>();
-        returnValue += ready_CO;
-      } else {
-        WARN("REQUESTED FIELD not present : " << para_name_to_retrieve);
-      }
-    } else {
-      continue;
-    }
-  }
-  return returnValue;
-}
-
-picojson::array get_array_parameters(std::string para_name_to_retrieve,
-                                     std::string block_name) {
-  std::string returnValue = "";
-  picojson::array blocks = getblock_fromconfig();
-  for (picojson::value& block : blocks) {
-    picojson::object& block_obj = block.get<picojson::object>();
-    std::string blk_name = block_obj.at("block_name").get<std::string>();
-    if (blk_name == block_name) {
-      if (para_name_to_retrieve == "Target_Vals") {
-        picojson::array& Target_Vals =
-                    block_obj.at("Target_Vals").get<picojson::array>();
-        return Target_Vals;
-      } else if (para_name_to_retrieve == "single_CO") {
-        picojson::array& single_CO =
-                         block_obj.at("single_CO").get<picojson::array>();
-        return single_CO;
-      } else if (para_name_to_retrieve == "Combi_CO") {
-        picojson::array& Combi_CO =
-                         block_obj.at("Combi_CO").get<picojson::array>();
-        return Combi_CO;
-      } else if (para_name_to_retrieve == "Input_Vals") {
-        picojson::array& Input_Vals =
-                         block_obj.at("Input_Vals").get<picojson::array>();
-        return Input_Vals;
-      } else if (para_name_to_retrieve == "Expected_Results") {
-        picojson::array& Expected =
-                     block_obj.at("Expected_Results").get<picojson::array>();
-        return Expected;
-      } else if (para_name_to_retrieve == "Expected_Results_for_no") {
-        picojson::array& Expected_for_no =
-                block_obj.at("Expected_Results_for_no").get<picojson::array>();
-        return Expected_for_no;
-      } else if (para_name_to_retrieve == "compiler_option") {
-        picojson::array& compiler_option =
-                       block_obj.at("compiler_option").get<picojson::array>();
-        return compiler_option;
-      } else if (para_name_to_retrieve == "reverse_compiler_option") {
-        picojson::array& reverse_compiler_option =
-                block_obj.at("reverse_compiler_option").get<picojson::array>();
-        return reverse_compiler_option;
-      } else if (para_name_to_retrieve == "Headers") {
-        picojson::array& Headers =
-                         block_obj.at("Headers").get<picojson::array>();
-        return Headers;
-      } else if (para_name_to_retrieve == "Src_headers") {
-        picojson::array& Src_headers =
-                         block_obj.at("Src_headers").get<picojson::array>();
-        return Src_headers;
-      } else if (para_name_to_retrieve == "depending_comp_optn") {
-        picojson::array& depending_comp_optn =
-                 block_obj.at("depending_comp_optn").get<picojson::array>();
-        return depending_comp_optn;
-      } else {
-        WARN("REQUESTED FIELD not present : " << para_name_to_retrieve);
-        return picojson::array();
-      }
-    } else {
-      continue;
-    }
-  }
-  WARN("REQUESTED BLOCK " << block_name << " is not present ");
-  return picojson::array();
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sindxl
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+This file has definition of functions for the following functinality:
+
+1) get_combi_string_vec() : Retrieve the combination string which contains
+contains the combination of block name which indicate the respective compiler
+option seperated by ':' from RtcConfig.jason file and returns them in the
+form of vectors.
+
+2) split_comb_string() : The combination of blockname which are seperated by
+':' has to split so that their respective compiler option can be retrieved
+from the json file. This functn internally calls calling_combination_function()
+for each of the combination of compiler options. This function returns a
+int value i.e the total failed cases in that combination which is obtained
+by calling_combination_function() function.
+
+3) calling_combination_function() : This function takes the combination of
+blockname as the input. The respective compiler option for that block name is
+retrieved from the json file and store the compiler options in a array.
+calling_resp_function() is called which mapps the compiler option function
+which has to be called with a set of required parameters
+(combination of compiler options is one among them). this function returns
+the status of execution ie 1 or 0 (bool).
+
+4) getblock_fromconfig() : This function is used to open the RtcConfig.json
+file and return the blocks.
+
+5) get_string_parameters() and get_array_parameters() : retrieved the
+parameters of the respective block name.
+
+*/
+
+#include <hip/hiprtc.h>
+#include <hip/hip_runtime.h>
+#include <picojson.h>
+#include <vector>
+#include <string>
+#include <fstream>
+#include <iostream>
+#include "headers/RtcUtility.h"
+#include "headers/RtcFunctions.h"
+#include "headers/RtcKernels.h"
+#include <hip_test_common.hh>
+#include "headers/printf_common.h"
+
+#pragma clang diagnostic ignored "-Wunused-but-set-variable"
+
+std::vector<std::string> get_combi_string_vec() {
+  picojson::array combi_string = get_array_parameters("Combi_CO",
+                                                      "all_compier_options");
+  std::vector<std::string> combi_string_list;
+  for (auto& indx : combi_string) {
+    combi_string_list.push_back(indx.get<std::string>());
+  }
+  return combi_string_list;
+}
+
+int split_comb_string(std::string option) {
+  int start_collon_index = option.find(':');
+  int start_index = 0;
+  std::vector<std::string> combi_block_name;
+  while (start_collon_index != std::string::npos) {
+    std::string singleoption = option.substr(start_index,
+                               start_collon_index - start_index);
+    combi_block_name.push_back(singleoption);
+    start_index = start_collon_index + 1;
+    start_collon_index = option.find(':', start_index);
+  }
+  std::string last_option = option.substr(start_index,
+                            option.length() - start_index);
+  combi_block_name.push_back(last_option);
+  return calling_combination_function(combi_block_name);
+}
+
+int calling_combination_function(std::vector<std::string> combi_vec_list) {
+  int combi_size = combi_vec_list.size();
+  int fast_math_present = -1, undef_present = 0;
+  int max_thread_position;
+  std::vector<std::string> hold_CO(combi_size, "");
+  const char** Combination_CO = new const char*[combi_size];
+  picojson::array undef_compiler_option = get_array_parameters(
+                                          "compiler_option", "undef_macro");
+  std::vector<std::string> undef_CO_vec;
+  for (auto& indx : undef_compiler_option) {
+    undef_CO_vec.push_back(indx.get<std::string>());
+  }
+  for (int i=0; i< combi_size; i++) {
+    if (combi_vec_list[i] == "max_thread") {
+      std::string ready_CO = get_string_parameters("ready_compiler_option",
+                                                    combi_vec_list[i]);
+      hold_CO[i] = ready_CO;
+      if (combi_vec_list[i] == "max_thread") {
+        max_thread_position = i;
+      }
+    } else if (combi_vec_list[i] == "header_dir") {
+      std::string retrived_CO = get_string_parameters("compiler_option",
+                                                      "header_dir");
+      std::string str = "pwd";
+      const char *cmd = str.c_str();
+      CaptureStream capture(stdout);
+      capture.Begin();
+      system(cmd);
+      capture.End();
+      std::string wor_dir = capture.getData();
+      std::string break_dir = wor_dir.substr(0, wor_dir.find("build"));
+      std::string append_str = "catch/unit/rtc/headers";
+      std::string CO = retrived_CO + " " + break_dir + append_str;
+      hold_CO[i] = CO;
+    } else if (combi_vec_list[i] == "architecture") {
+      std::string retrived_CO = get_string_parameters("compiler_option",
+                                                      "architecture");
+      hipDeviceProp_t prop;
+      HIP_CHECK(hipGetDeviceProperties(&prop, 0));
+      std::string actual_architecture = prop.gcnArchName;
+      std::string complete_CO = retrived_CO + actual_architecture;
+      hold_CO[i] = complete_CO;
+    } else if (check_positive_CO_present(combi_vec_list[i]) == 1) {
+      std::string positive_CO = get_string_parameters("compiler_option",
+                                                      combi_vec_list[i]);
+      hold_CO[i] = positive_CO;
+      if (combi_vec_list[i] == "fast_math")
+        fast_math_present = 1;
+    } else if (check_negative_CO_present(combi_vec_list[i]) == 1) {
+      std::string split_block_name = combi_vec_list[i].substr(3,
+                                               combi_vec_list[i].length() - 3);
+      std::string negative_CO = get_string_parameters(
+                                "reverse_compiler_option", split_block_name);
+      hold_CO[i] = negative_CO;
+      if (split_block_name == "fast_math")
+        fast_math_present = 0;
+    } else if ( combi_vec_list[i] == "conversion_error"
+             || combi_vec_list[i] == "conversion_no_error"
+             || combi_vec_list[i] == "conversion_no_warning"
+             || combi_vec_list[i] == "conversion_warning") {
+      picojson::array compiler_option = get_array_parameters("compiler_option",
+                                                             "error");
+      std::vector<std::string> CO_vec;
+      for (auto& indx : compiler_option) {
+        CO_vec.push_back(indx.get<std::string>());
+      }
+      if (combi_vec_list[i] == "conversion_error") {
+        hold_CO[i] = CO_vec[0];
+      } else if (combi_vec_list[i] == "conversion_no_error") {
+        hold_CO[i] = CO_vec[1];
+      } else if (combi_vec_list[i] == "conversion_warning") {
+        hold_CO[i] = CO_vec[2];
+      } else if (combi_vec_list[i] == "conversion_no_warning") {
+        hold_CO[i] = CO_vec[3];
+      }
+    } else if (combi_vec_list[i] == "off_ffp_contract"
+             || combi_vec_list[i] == "on_ffp_contract"
+             || combi_vec_list[i] == "fast_ffp_contract"
+             || combi_vec_list[i] == "pragmas_ffp_contract") {
+      picojson::array compiler_option = get_array_parameters("compiler_option",
+                                                             "ffp_contract");
+      std::vector<std::string> CO_vec;
+      for (auto& indx : compiler_option) {
+        CO_vec.push_back(indx.get<std::string>());
+      }
+      if (combi_vec_list[i] == "off_ffp_contract") {
+        hold_CO[i] = CO_vec[0];
+      } else if (combi_vec_list[i] == "on_ffp_contract") {
+        hold_CO[i] = CO_vec[1];
+      } else if (combi_vec_list[i] == "fast_ffp_contract") {
+        hold_CO[i] = CO_vec[2];
+      } else if (combi_vec_list[i] == "pragmas_ffp_contract") {
+        hold_CO[i] = CO_vec[3];
+      }
+    } else if (combi_vec_list[i] =="undef_macro") {
+      hold_CO[i] = undef_CO_vec[1].c_str();
+      undef_present = 1;
+    } else {
+      WARN("BLOCK NAME " << combi_vec_list[i] << " NOT PRESENT");
+    }
+    Combination_CO[i] = hold_CO[i].c_str();
+  }
+  int errors = 0;
+  for (int j = 0; j< combi_size; j++) {
+    std::string block_name = combi_vec_list[j].c_str();
+    if (!calling_resp_function(block_name, Combination_CO, combi_size,
+                               max_thread_position, fast_math_present)) {
+      errors++;
+    }
+    Combination_CO[j] = hold_CO[j].c_str();
+  }
+  return errors;
+}
+
+int check_positive_CO_present(std::string find_string) {
+  static std::vector<std::string> positive_CO = {"macro", "warning", "rdc",
+                                                 "denormals", "fp32_div_sqrt",
+                                                 "Rpass_inline", "fast_math",
+                                                 "slp_vectorize",
+                                                 "amdgpu_ieee",
+                                                 "unsafe_atomic",
+                                                 "infinite_num", "NAN_num",
+                                                 "slp_vectorize", "math_errno",
+                                                 "associative_math",
+                                                 "signed_zeros", "finite_math",
+                                                 "trapping_math"};
+  if (std::find(positive_CO.begin(), positive_CO.end(),
+      find_string) != positive_CO.end())
+    return 1;
+  else
+    return 0;
+}
+
+int check_negative_CO_present(std::string find_string) {
+  static std::vector<std::string> negative_CO = {"no_fast_math",
+                                                 "no_fp32_div_sqrt",
+                                                 "no_denormals",
+                                                 "no_slp_vectorize",
+                                                 "no_amdgpu_ieee",
+                                                 "no_unsafe_atomic",
+                                                 "no_infinite_num",
+                                                 "no_slp_vectorize",
+                                                 "no_NAN_num",
+                                                 "no_math_errno",
+                                                 "no_associative_math",
+                                                 "no_signed_zeros",
+                                                 "no_finite_math",
+                                                 "no_trapping_math"};
+  if (std::find(negative_CO.begin(), negative_CO.end(),
+      find_string) != negative_CO.end())
+    return 1;
+  else
+    return 0;
+}
+
+bool  calling_resp_function(const std::string block_name,
+                           const char** Combination_CO,
+                           int Combination_CO_size, int max_thread_position,
+                           int fast_math_present) {
+  if (block_name == "max_thread") {
+    return check_max_thread(Combination_CO, Combination_CO_size,
+                            max_thread_position, fast_math_present);
+  } else if (block_name == "architecture") {
+    return check_architecture(Combination_CO, Combination_CO_size,
+                              max_thread_position, fast_math_present);
+  } else if (block_name == "rdc") {
+    return check_rdc(Combination_CO, Combination_CO_size,
+                     max_thread_position, fast_math_present);
+  } else if (block_name == "denormals") {
+    return check_denormals_enabled(Combination_CO, Combination_CO_size,
+                                   max_thread_position, fast_math_present);
+  } else if (block_name == "no_denormals") {
+    return check_denormals_disabled(Combination_CO, Combination_CO_size,
+                                    max_thread_position, fast_math_present);
+  } else if (block_name == "warning") {
+    return check_warning(Combination_CO, Combination_CO_size,
+                         max_thread_position, fast_math_present);
+  } else if (block_name == "conversion_error") {
+    return check_conversionerror_enabled(Combination_CO, Combination_CO_size,
+                                         max_thread_position,
+                                         fast_math_present);
+  } else if (block_name == "conversion_no_error") {
+    return check_conversionerror_disabled(Combination_CO, Combination_CO_size,
+                                          max_thread_position,
+                                          fast_math_present);
+  } else if (block_name == "conversion_warning") {
+    return check_conversionwarning_enabled(Combination_CO, Combination_CO_size,
+                                           max_thread_position,
+                                           fast_math_present);
+  } else if (block_name == "conversion_no_warning") {
+    return check_conversionwarning_disabled(Combination_CO,
+                                            Combination_CO_size,
+                                            max_thread_position,
+                                            fast_math_present);
+  } else if (block_name == "Rpass_inline") {
+    return check_Rpass_inline(Combination_CO, Combination_CO_size,
+                              max_thread_position, fast_math_present);
+  } else if (block_name == "macro") {
+    return check_macro(Combination_CO, Combination_CO_size,
+                       max_thread_position, fast_math_present);
+  } else if (block_name == "undef_macro") {
+    return check_undef_macro(Combination_CO, Combination_CO_size,
+                             max_thread_position, fast_math_present);
+  } else if (block_name == "header_dir") {
+    return check_header_dir(Combination_CO, Combination_CO_size,
+                            max_thread_position, fast_math_present);
+  } else if (block_name == "no_fast_math") {
+    return check_fast_math_disabled(Combination_CO, Combination_CO_size,
+                                    max_thread_position, fast_math_present);
+  } else if (block_name == "fast_math") {
+    return check_fast_math_enabled(Combination_CO, Combination_CO_size,
+                                   max_thread_position, fast_math_present);
+  } else if (block_name == "off_ffp_contract") {
+    return check_ffp_contract_off(Combination_CO, Combination_CO_size,
+                                  max_thread_position, fast_math_present);
+  } else if (block_name == "on_ffp_contract") {
+    return check_ffp_contract_on(Combination_CO, Combination_CO_size,
+                                 max_thread_position, fast_math_present);
+  } else if (block_name == "fast_ffp_contract") {
+    return check_ffp_contract_fast(Combination_CO, Combination_CO_size,
+                                   max_thread_position, fast_math_present);
+  } else if (block_name == "no_unsafe_atomic") {
+    return check_unsafe_atomic_disabled(Combination_CO, Combination_CO_size,
+                                        max_thread_position,
+                                        fast_math_present);
+  } else if (block_name == "unsafe_atomic") {
+    return check_unsafe_atomic_enabled(Combination_CO, Combination_CO_size,
+                                       max_thread_position,
+                                       fast_math_present);
+  } else if (block_name == "no_slp_vectorize") {
+    return check_slp_vectorize_disabled(Combination_CO, Combination_CO_size,
+                                        max_thread_position,
+                                        fast_math_present);
+  } else if (block_name == "slp_vectorize") {
+    return check_slp_vectorize_enabled(Combination_CO, Combination_CO_size,
+                                       max_thread_position,
+                                       fast_math_present);
+  } else if (block_name == "infinite_num") {
+    return check_infinite_num_enabled(Combination_CO, Combination_CO_size,
+                                      max_thread_position,
+                                      fast_math_present);
+  } else if (block_name == "no_infinite_num") {
+    return check_infinite_num_disabled(Combination_CO, Combination_CO_size,
+                                       max_thread_position,
+                                       fast_math_present);
+  } else if (block_name == "NAN_num") {
+    return check_NAN_num_enabled(Combination_CO, Combination_CO_size,
+                                 max_thread_position, fast_math_present);
+  } else if (block_name == "no_NAN_num") {
+    return check_NAN_num_disabled(Combination_CO, Combination_CO_size,
+                                  max_thread_position, fast_math_present);
+  } else if (block_name == "finite_math") {
+    return check_finite_math_enabled(Combination_CO, Combination_CO_size,
+                                     max_thread_position, fast_math_present);
+  } else if (block_name == "no_finite_math") {
+    return check_finite_math_disabled(Combination_CO, Combination_CO_size,
+                                      max_thread_position, fast_math_present);
+  } else if (block_name == "associative_math") {
+    return check_associative_math_enabled(Combination_CO, Combination_CO_size,
+                                          max_thread_position,
+                                          fast_math_present);
+  } else if (block_name == "no_associative_math") {
+    return check_associative_math_disabled(Combination_CO, Combination_CO_size,
+                                           max_thread_position,
+                                           fast_math_present);
+  } else if (block_name == "signed_zeros") {
+    return check_signed_zeros_enabled(Combination_CO, Combination_CO_size,
+                                      max_thread_position,
+                                      fast_math_present);
+  } else if (block_name == "no_signed_zeros") {
+    return check_signed_zeros_disabled(Combination_CO, Combination_CO_size,
+                                       max_thread_position,
+                                       fast_math_present);
+  } else if (block_name == "trapping_math") {
+    return check_trapping_math_enabled(Combination_CO, Combination_CO_size,
+                                       max_thread_position,
+                                       fast_math_present);
+  } else if (block_name == "no_trapping_math") {
+    return check_trapping_math_disabled(Combination_CO, Combination_CO_size,
+                                        max_thread_position,
+                                        fast_math_present);
+  } else {
+    WARN("BLOCK NAME '" << block_name << "' not found");
+    return 0;
+  }
+}
+
+picojson::array getblock_fromconfig() {
+  std::string str = "pwd";
+  const char *cmd = str.c_str();
+  CaptureStream capture(stdout);
+  capture.Begin();
+  system(cmd);
+  capture.End();
+  std::string wor_dir = capture.getData();
+  std::string break_dir = wor_dir.substr(0, wor_dir.find("build"));
+  std::string append_str = "catch/unit/rtc/RtcConfig.json";
+  std::string config_path = break_dir + append_str;
+  std::string returnValue = "";
+  std::ifstream json_file(config_path.c_str());
+  if (!json_file.is_open()) {
+    WARN("Error loading config.jason");
+    exit(0);
+  }
+  std::string json_str((std::istreambuf_iterator<char>(json_file)),
+                        std::istreambuf_iterator<char>());
+  picojson::value v;
+  std::string err = picojson::parse(v, json_str);
+  if (!err.empty()) {
+    WARN("empty config.jason");
+    exit(0);
+  }
+  picojson::array& blocks = v.get<picojson::array>();
+  return blocks;
+}
+
+std::string get_string_parameters(std::string para_name_to_retrieve,
+                                  std::string block_name) {
+  std::string returnValue = "";
+  picojson::array blocks = getblock_fromconfig();
+  for (picojson::value& block : blocks) {
+    picojson::object& block_obj = block.get<picojson::object>();
+    std::string blk_name = block_obj.at("block_name").get<std::string>();
+    if (blk_name == block_name) {
+      if (para_name_to_retrieve == "compiler_option") {
+        std::string compiler_opt =
+                            block_obj.at("compiler_option").get<std::string>();
+        returnValue += compiler_opt;
+      } else if (para_name_to_retrieve == "Target_Vals") {
+        std::string Target_Vals =
+                                block_obj.at("Target_Vals").get<std::string>();
+        returnValue += Target_Vals;
+      } else if (para_name_to_retrieve == "kernel_name") {
+        std::string ker_name = block_obj.at("kernel_name").get<std::string>();
+        returnValue += ker_name;
+      } else if (para_name_to_retrieve == "reverse_compiler_option") {
+        std::string reverse =
+                    block_obj.at("reverse_compiler_option").get<std::string>();
+        returnValue += reverse;
+      } else if (para_name_to_retrieve == "ready_compiler_option") {
+        std::string ready_CO =
+                      block_obj.at("ready_compiler_option").get<std::string>();
+        returnValue += ready_CO;
+      } else {
+        WARN("REQUESTED FIELD not present : " << para_name_to_retrieve);
+      }
+    } else {
+      continue;
+    }
+  }
+  return returnValue;
+}
+
+picojson::array get_array_parameters(std::string para_name_to_retrieve,
+                                     std::string block_name) {
+  std::string returnValue = "";
+  picojson::array blocks = getblock_fromconfig();
+  for (picojson::value& block : blocks) {
+    picojson::object& block_obj = block.get<picojson::object>();
+    std::string blk_name = block_obj.at("block_name").get<std::string>();
+    if (blk_name == block_name) {
+      if (para_name_to_retrieve == "Target_Vals") {
+        picojson::array& Target_Vals =
+                    block_obj.at("Target_Vals").get<picojson::array>();
+        return Target_Vals;
+      } else if (para_name_to_retrieve == "single_CO") {
+        picojson::array& single_CO =
+                         block_obj.at("single_CO").get<picojson::array>();
+        return single_CO;
+      } else if (para_name_to_retrieve == "Combi_CO") {
+        picojson::array& Combi_CO =
+                         block_obj.at("Combi_CO").get<picojson::array>();
+        return Combi_CO;
+      } else if (para_name_to_retrieve == "Input_Vals") {
+        picojson::array& Input_Vals =
+                         block_obj.at("Input_Vals").get<picojson::array>();
+        return Input_Vals;
+      } else if (para_name_to_retrieve == "Expected_Results") {
+        picojson::array& Expected =
+                     block_obj.at("Expected_Results").get<picojson::array>();
+        return Expected;
+      } else if (para_name_to_retrieve == "Expected_Results_for_no") {
+        picojson::array& Expected_for_no =
+                block_obj.at("Expected_Results_for_no").get<picojson::array>();
+        return Expected_for_no;
+      } else if (para_name_to_retrieve == "compiler_option") {
+        picojson::array& compiler_option =
+                       block_obj.at("compiler_option").get<picojson::array>();
+        return compiler_option;
+      } else if (para_name_to_retrieve == "reverse_compiler_option") {
+        picojson::array& reverse_compiler_option =
+                block_obj.at("reverse_compiler_option").get<picojson::array>();
+        return reverse_compiler_option;
+      } else if (para_name_to_retrieve == "Headers") {
+        picojson::array& Headers =
+                         block_obj.at("Headers").get<picojson::array>();
+        return Headers;
+      } else if (para_name_to_retrieve == "Src_headers") {
+        picojson::array& Src_headers =
+                         block_obj.at("Src_headers").get<picojson::array>();
+        return Src_headers;
+      } else if (para_name_to_retrieve == "depending_comp_optn") {
+        picojson::array& depending_comp_optn =
+                 block_obj.at("depending_comp_optn").get<picojson::array>();
+        return depending_comp_optn;
+      } else {
+        WARN("REQUESTED FIELD not present : " << para_name_to_retrieve);
+        return picojson::array();
+      }
+    } else {
+      continue;
+    }
+  }
+  WARN("REQUESTED BLOCK " << block_name << " is not present ");
+  return picojson::array();
+}
diff --git a/projects/hip-tests/catch/unit/rtc/headers/RtcFunctions.h b/projects/hip-tests/catch/unit/rtc/headers/RtcFunctions.h
index 46f0d27810..d9bd325575 100644
--- a/projects/hip-tests/catch/unit/rtc/headers/RtcFunctions.h
+++ b/projects/hip-tests/catch/unit/rtc/headers/RtcFunctions.h
@@ -1,178 +1,178 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.� IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-The Functions defined in RtcFunctions.cpp are declared here in RtcFunctions.h.
-*/
-
-#ifndef CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
-#define CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
-#include <string>
-
-bool check_architecture(const char** Combination_CO,
-                       int Combination_CO_size, int max_thread_pos,
-                       int fast_math_present);
-
-bool check_rdc(const char** Combination_CO,
-               int Combination_CO_size, int max_thread_pos,
-               int fast_math_present);
-
-bool check_denormals_enabled(const char** Combination_CO,
-                             int Combination_CO_size, int max_thread_pos,
-                             int fast_math_present);
-
-bool check_denormals_disabled(const char** Combination_CO,
-                              int Combination_CO_size, int max_thread_pos,
-                              int fast_math_present);
-
-bool check_ffp_contract_off(const char** Combination_CO,
-                            int Combination_CO_size, int max_thread_pos,
-                            int fast_math_present);
-
-bool check_ffp_contract_on(const char** Combination_CO,
-                           int Combination_CO_size, int max_thread_pos,
-                           int fast_math_present);
-
-bool check_ffp_contract_fast(const char** Combination_CO,
-                             int Combination_CO_size, int max_thread_pos,
-                             int fast_math_present);
-
-bool check_fast_math_enabled(const char** Combination_CO,
-                             int Combination_CO_size, int max_thread_pos,
-                             int fast_math_present);
-
-bool check_fast_math_disabled(const char** Combination_CO,
-                              int Combination_CO_size, int max_thread_pos,
-                              int fast_math_present);
-
-bool check_slp_vectorize_enabled(const char** Combination_CO,
-                                 int Combination_CO_size, int max_thread_pos,
-                                 int fast_math_present);
-
-bool check_slp_vectorize_disabled(const char** Combination_CO,
-                                  int Combination_CO_size, int max_thread_pos,
-                                  int fast_math_present);
-
-bool check_macro(const char** Combination_CO,
-                 int Combination_CO_size, int max_thread_pos,
-                 int fast_math_present);
-
-bool check_undef_macro(const char** Combination_CO,
-                       int Combination_CO_size, int max_thread_pos,
-                       int fast_math_present);
-
-bool check_header_dir(const char** Combination_CO,
-                      int Combination_CO_size, int max_thread_pos,
-                      int fast_math_present);
-
-bool check_warning(const char** Combination_CO,
-                   int Combination_CO_size, int max_thread_pos,
-                   int fast_math_present);
-
-bool check_Rpass_inline(const char** Combination_CO,
-                        int Combination_CO_size, int max_thread_pos,
-                        int fast_math_present);
-
-bool check_conversionerror_enabled(const char** Combination_CO,
-                                   int Combination_CO_size, int max_thread_pos,
-                                   int fast_math_present);
-
-bool check_conversionerror_disabled(const char** Combination_CO,
-                                    int Combination_CO_size,
-                                    int max_thread_pos,
-                                    int fast_math_present);
-
-bool check_conversionwarning_enabled(const char** Combination_CO,
-                                     int Combination_CO_size,
-                                     int max_thread_pos,
-                                     int fast_math_present);
-
-bool check_conversionwarning_disabled(const char** Combination_CO,
-                                      int Combination_CO_size,
-                                      int max_thread_pos,
-                                      int fast_math_present);
-
-bool check_max_thread(const char** Combination_CO,
-                      int Combination_CO_size, int max_thread_pos,
-                      int fast_math_present);
-
-bool check_unsafe_atomic_enabled(const char** Combination_CO,
-                                 int Combination_CO_size, int max_thread_pos,
-                                 int fast_math_present);
-
-bool check_unsafe_atomic_disabled(const char** Combination_CO,
-                                  int Combination_CO_size, int max_thread_pos,
-                                  int fast_math_present);
-
-bool check_infinite_num_enabled(const char** Combination_CO,
-                                int Combination_CO_size, int max_thread_pos,
-                                int fast_math_present);
-
-bool check_infinite_num_disabled(const char** Combination_CO,
-                                 int Combination_CO_size, int max_thread_pos,
-                                 int fast_math_present);
-
-bool check_NAN_num_enabled(const char** Combination_CO,
-                           int Combination_CO_size, int max_thread_pos,
-                           int fast_math_present);
-
-bool check_NAN_num_disabled(const char** Combination_CO,
-                            int Combination_CO_size, int max_thread_pos,
-                            int fast_math_present);
-
-bool check_finite_math_enabled(const char** Combination_CO,
-                               int Combination_CO_size, int max_thread_pos,
-                               int fast_math_present);
-
-bool check_finite_math_disabled(const char** Combination_CO,
-                                int Combination_CO_size, int max_thread_pos,
-                                int fast_math_present);
-
-bool check_associative_math_enabled(const char** Combination_CO,
-                                    int Combination_CO_size,
-                                    int max_thread_pos,
-                                    int fast_math_present);
-
-bool check_associative_math_disabled(const char** Combination_CO,
-                                     int Combination_CO_size,
-                                     int max_thread_pos,
-                                     int fast_math_present);
-
-bool check_signed_zeros_enabled(const char** Combination_CO,
-                                int Combination_CO_size,
-                                int max_thread_pos,
-                                int fast_math_present);
-
-bool check_signed_zeros_disabled(const char** Combination_CO,
-                                 int Combination_CO_size, int max_thread_pos,
-                                 int fast_math_present);
-
-bool check_trapping_math_enabled(const char** Combination_CO,
-                                 int Combination_CO_size, int max_thread_pos,
-                                 int fast_math_present);
-
-bool check_trapping_math_disabled(const char** Combination_CO,
-                                  int Combination_CO_size, int max_thread_pos,
-                                  int fast_math_present);
-
-std::string checking_IR(const char* kername, const char** extra_CO_IRadded,
-                        int extra_CO_IRadded_size, const char** Combination_CO,
-                        int Combination_CO_size);
-
-#endif  // CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.� IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+The Functions defined in RtcFunctions.cpp are declared here in RtcFunctions.h.
+*/
+
+#ifndef CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
+#define CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
+#include <string>
+
+bool check_architecture(const char** Combination_CO,
+                       int Combination_CO_size, int max_thread_pos,
+                       int fast_math_present);
+
+bool check_rdc(const char** Combination_CO,
+               int Combination_CO_size, int max_thread_pos,
+               int fast_math_present);
+
+bool check_denormals_enabled(const char** Combination_CO,
+                             int Combination_CO_size, int max_thread_pos,
+                             int fast_math_present);
+
+bool check_denormals_disabled(const char** Combination_CO,
+                              int Combination_CO_size, int max_thread_pos,
+                              int fast_math_present);
+
+bool check_ffp_contract_off(const char** Combination_CO,
+                            int Combination_CO_size, int max_thread_pos,
+                            int fast_math_present);
+
+bool check_ffp_contract_on(const char** Combination_CO,
+                           int Combination_CO_size, int max_thread_pos,
+                           int fast_math_present);
+
+bool check_ffp_contract_fast(const char** Combination_CO,
+                             int Combination_CO_size, int max_thread_pos,
+                             int fast_math_present);
+
+bool check_fast_math_enabled(const char** Combination_CO,
+                             int Combination_CO_size, int max_thread_pos,
+                             int fast_math_present);
+
+bool check_fast_math_disabled(const char** Combination_CO,
+                              int Combination_CO_size, int max_thread_pos,
+                              int fast_math_present);
+
+bool check_slp_vectorize_enabled(const char** Combination_CO,
+                                 int Combination_CO_size, int max_thread_pos,
+                                 int fast_math_present);
+
+bool check_slp_vectorize_disabled(const char** Combination_CO,
+                                  int Combination_CO_size, int max_thread_pos,
+                                  int fast_math_present);
+
+bool check_macro(const char** Combination_CO,
+                 int Combination_CO_size, int max_thread_pos,
+                 int fast_math_present);
+
+bool check_undef_macro(const char** Combination_CO,
+                       int Combination_CO_size, int max_thread_pos,
+                       int fast_math_present);
+
+bool check_header_dir(const char** Combination_CO,
+                      int Combination_CO_size, int max_thread_pos,
+                      int fast_math_present);
+
+bool check_warning(const char** Combination_CO,
+                   int Combination_CO_size, int max_thread_pos,
+                   int fast_math_present);
+
+bool check_Rpass_inline(const char** Combination_CO,
+                        int Combination_CO_size, int max_thread_pos,
+                        int fast_math_present);
+
+bool check_conversionerror_enabled(const char** Combination_CO,
+                                   int Combination_CO_size, int max_thread_pos,
+                                   int fast_math_present);
+
+bool check_conversionerror_disabled(const char** Combination_CO,
+                                    int Combination_CO_size,
+                                    int max_thread_pos,
+                                    int fast_math_present);
+
+bool check_conversionwarning_enabled(const char** Combination_CO,
+                                     int Combination_CO_size,
+                                     int max_thread_pos,
+                                     int fast_math_present);
+
+bool check_conversionwarning_disabled(const char** Combination_CO,
+                                      int Combination_CO_size,
+                                      int max_thread_pos,
+                                      int fast_math_present);
+
+bool check_max_thread(const char** Combination_CO,
+                      int Combination_CO_size, int max_thread_pos,
+                      int fast_math_present);
+
+bool check_unsafe_atomic_enabled(const char** Combination_CO,
+                                 int Combination_CO_size, int max_thread_pos,
+                                 int fast_math_present);
+
+bool check_unsafe_atomic_disabled(const char** Combination_CO,
+                                  int Combination_CO_size, int max_thread_pos,
+                                  int fast_math_present);
+
+bool check_infinite_num_enabled(const char** Combination_CO,
+                                int Combination_CO_size, int max_thread_pos,
+                                int fast_math_present);
+
+bool check_infinite_num_disabled(const char** Combination_CO,
+                                 int Combination_CO_size, int max_thread_pos,
+                                 int fast_math_present);
+
+bool check_NAN_num_enabled(const char** Combination_CO,
+                           int Combination_CO_size, int max_thread_pos,
+                           int fast_math_present);
+
+bool check_NAN_num_disabled(const char** Combination_CO,
+                            int Combination_CO_size, int max_thread_pos,
+                            int fast_math_present);
+
+bool check_finite_math_enabled(const char** Combination_CO,
+                               int Combination_CO_size, int max_thread_pos,
+                               int fast_math_present);
+
+bool check_finite_math_disabled(const char** Combination_CO,
+                                int Combination_CO_size, int max_thread_pos,
+                                int fast_math_present);
+
+bool check_associative_math_enabled(const char** Combination_CO,
+                                    int Combination_CO_size,
+                                    int max_thread_pos,
+                                    int fast_math_present);
+
+bool check_associative_math_disabled(const char** Combination_CO,
+                                     int Combination_CO_size,
+                                     int max_thread_pos,
+                                     int fast_math_present);
+
+bool check_signed_zeros_enabled(const char** Combination_CO,
+                                int Combination_CO_size,
+                                int max_thread_pos,
+                                int fast_math_present);
+
+bool check_signed_zeros_disabled(const char** Combination_CO,
+                                 int Combination_CO_size, int max_thread_pos,
+                                 int fast_math_present);
+
+bool check_trapping_math_enabled(const char** Combination_CO,
+                                 int Combination_CO_size, int max_thread_pos,
+                                 int fast_math_present);
+
+bool check_trapping_math_disabled(const char** Combination_CO,
+                                  int Combination_CO_size, int max_thread_pos,
+                                  int fast_math_present);
+
+std::string checking_IR(const char* kername, const char** extra_CO_IRadded,
+                        int extra_CO_IRadded_size, const char** Combination_CO,
+                        int Combination_CO_size);
+
+#endif  // CATCH_UNIT_RTC_HEADERS_RTCFUNCTIONS_H_
diff --git a/projects/hip-tests/catch/unit/rtc/headers/RtcKernels.h b/projects/hip-tests/catch/unit/rtc/headers/RtcKernels.h
index f3ff6aac83..392f6ddb11 100644
--- a/projects/hip-tests/catch/unit/rtc/headers/RtcKernels.h
+++ b/projects/hip-tests/catch/unit/rtc/headers/RtcKernels.h
@@ -1,163 +1,163 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.� IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-RtcKernels.h contains the string's with the which includes the kernel code.
-They are utilized by the compiler option functions, defined in RtcFunctions.cpp
-*/
-
-#ifndef CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
-#define CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
-#include <hip/hiprtc.h>
-#include <hip/hip_runtime.h>
-#include <math.h>
-
-static constexpr auto max_thread_string {
-R"(
-extern "C"
-__global__ void max_thread(int* a) {
-  int BD = blockDim.x;
-  *a = BD;
-}
-)"};
-
-static constexpr auto denormals_string {
-R"(
-extern "C"
-__global__ void denormals(double* base, double* power, double* result) {
-  float denorm = powf(*base, *power);
-  if (*result == 0 || *result ==1 )
-    *result = (denorm==0) ? 0 : 1;
-  else
-    *result = powf(*base, *power);
-}
-)"};
-
-static constexpr auto warning_string {
-R"(
-extern "C"
-__global__ void warning() {
-  #warning "Just printing a WARNING message onto the terminal";
-}
-)"};
-
-static constexpr auto fp32_div_sqrt_string {
-R"(
-extern "C"
-__global__ void fp32_div_sqrt(float* result) {
-  float input = 109.6209;
-  *result = sqrt(input);
-}
-)"};
-
-static constexpr auto error_string {
-R"(
-extern "C"
-__global__ void error() {
-  unsigned int a = -1;
-  unsigned int b = +1;
-  signed int c = -1;
-  signed int d = +1;
-}
-)"};
-
-static constexpr auto macro_string {
-R"(
-extern "C"
-__global__ void macro(int *result) {
-  *result = PI;
-}
-)"};
-
-static constexpr auto undef_macro_string {
-R"(
-extern "C"
-__global__ void undef_macro() {
-  int a = Z;
-}
-)"};
-
-static constexpr auto header_dir_string {
-R"(
-#include "RtcFact.h"
-extern "C"
-__global__ void header_dir(int* a, int* val) {
-  *a = fact(*val);
-}
-)"};
-
-static constexpr auto rdc_string {
-R"(
-extern "C"
-__global__ void rdc(float* a, float* b, float* c) {
-  *c = *a * *b;
-}
-)"};
-
-static constexpr auto ffp_contract_string {
-R"(
-extern "C"
-__global__ void ffp_contract(float* a, float* b, float* c) {
-  *c = *a * *b + *c;
-}
-)"};
-
-static constexpr auto slp_vectorize_string {
-R"(
-extern "C"
-__global__ void slp_vectorize(__half2 a, __half2 x, __half2 *y) {
-  (*y).data.x = x.data.x + a.data.x;
-  (*y).data.y = x.data.y + a.data.y;
-}
-)"};
-
-static constexpr auto unsafe_atomic_string {
-R"(
-extern "C"
-__global__ void unsafe_atomic(float* a) {
-  int id = threadIdx.x + blockIdx.x * blockDim.x;
-  if (id < 1000) {
-    unsafeAtomicAdd(&a[id], 0.2f);
-  }
-}
-)"};
-
-static constexpr auto amdgpu_ieee_string {
-R"(
-extern "C"
-__global__ void amdgpu_ieee(float* a, float* b, float* c) {
-  *c = sqrt(*a / *b);
-  printf("sqrt(a * b) = %f\n", *c);
-}
-)"};
-
-static constexpr auto associative_math_string {
-R"(
-extern "C"
-__global__ void associative_math(int* check) {
-  double x = 0.1f;
-  double y = 0.2f;
-  double z = 0.3f;
-  if((x*y)*z != x*(y*z))
-    *check = 1;
-  else *check = 0;
-}
-)"};
-
-#endif  // CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.� IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+RtcKernels.h contains the string's with the which includes the kernel code.
+They are utilized by the compiler option functions, defined in RtcFunctions.cpp
+*/
+
+#ifndef CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
+#define CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
+#include <hip/hiprtc.h>
+#include <hip/hip_runtime.h>
+#include <math.h>
+
+static constexpr auto max_thread_string {
+R"(
+extern "C"
+__global__ void max_thread(int* a) {
+  int BD = blockDim.x;
+  *a = BD;
+}
+)"};
+
+static constexpr auto denormals_string {
+R"(
+extern "C"
+__global__ void denormals(double* base, double* power, double* result) {
+  float denorm = powf(*base, *power);
+  if (*result == 0 || *result ==1 )
+    *result = (denorm==0) ? 0 : 1;
+  else
+    *result = powf(*base, *power);
+}
+)"};
+
+static constexpr auto warning_string {
+R"(
+extern "C"
+__global__ void warning() {
+  #warning "Just printing a WARNING message onto the terminal";
+}
+)"};
+
+static constexpr auto fp32_div_sqrt_string {
+R"(
+extern "C"
+__global__ void fp32_div_sqrt(float* result) {
+  float input = 109.6209;
+  *result = sqrt(input);
+}
+)"};
+
+static constexpr auto error_string {
+R"(
+extern "C"
+__global__ void error() {
+  unsigned int a = -1;
+  unsigned int b = +1;
+  signed int c = -1;
+  signed int d = +1;
+}
+)"};
+
+static constexpr auto macro_string {
+R"(
+extern "C"
+__global__ void macro(int *result) {
+  *result = PI;
+}
+)"};
+
+static constexpr auto undef_macro_string {
+R"(
+extern "C"
+__global__ void undef_macro() {
+  int a = Z;
+}
+)"};
+
+static constexpr auto header_dir_string {
+R"(
+#include "RtcFact.h"
+extern "C"
+__global__ void header_dir(int* a, int* val) {
+  *a = fact(*val);
+}
+)"};
+
+static constexpr auto rdc_string {
+R"(
+extern "C"
+__global__ void rdc(float* a, float* b, float* c) {
+  *c = *a * *b;
+}
+)"};
+
+static constexpr auto ffp_contract_string {
+R"(
+extern "C"
+__global__ void ffp_contract(float* a, float* b, float* c) {
+  *c = *a * *b + *c;
+}
+)"};
+
+static constexpr auto slp_vectorize_string {
+R"(
+extern "C"
+__global__ void slp_vectorize(__half2 a, __half2 x, __half2 *y) {
+  (*y).data.x = x.data.x + a.data.x;
+  (*y).data.y = x.data.y + a.data.y;
+}
+)"};
+
+static constexpr auto unsafe_atomic_string {
+R"(
+extern "C"
+__global__ void unsafe_atomic(float* a) {
+  int id = threadIdx.x + blockIdx.x * blockDim.x;
+  if (id < 1000) {
+    unsafeAtomicAdd(&a[id], 0.2f);
+  }
+}
+)"};
+
+static constexpr auto amdgpu_ieee_string {
+R"(
+extern "C"
+__global__ void amdgpu_ieee(float* a, float* b, float* c) {
+  *c = sqrt(*a / *b);
+  printf("sqrt(a * b) = %f\n", *c);
+}
+)"};
+
+static constexpr auto associative_math_string {
+R"(
+extern "C"
+__global__ void associative_math(int* check) {
+  double x = 0.1f;
+  double y = 0.2f;
+  double z = 0.3f;
+  if((x*y)*z != x*(y*z))
+    *check = 1;
+  else *check = 0;
+}
+)"};
+
+#endif  // CATCH_UNIT_RTC_HEADERS_RTCKERNELS_H_
diff --git a/projects/hip-tests/catch/unit/rtc/headers/RtcUtility.h b/projects/hip-tests/catch/unit/rtc/headers/RtcUtility.h
index c7fdd71372..f9e1e04a89 100644
--- a/projects/hip-tests/catch/unit/rtc/headers/RtcUtility.h
+++ b/projects/hip-tests/catch/unit/rtc/headers/RtcUtility.h
@@ -1,53 +1,53 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.� IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/*
-The Functions defined in RtcUtility.cpp are declared here in RtcUtility.h.
-*/
-
-#ifndef CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
-#define CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
-#include <picojson.h>
-#include <vector>
-#include <string>
-
-std::vector<std::string> get_combi_string_vec();
-
-int split_comb_string(std::string option);
-
-int calling_combination_function(std::vector<std::string> combi_vec_list);
-
-int check_positive_CO_present(std::string find_string);
-
-int check_negative_CO_present(std::string find_string);
-
-bool calling_resp_function(const std::string block_name,
-                           const char** Combination_CO,
-                           int Combination_CO_size, int max_thread_position,
-                           int fast_math_present);
-
-picojson::array getblock_fromconfig();
-
-std::string get_string_parameters(std::string para_name_to_retrieve,
-                                  std::string block_name);
-
-picojson::array get_array_parameters(std::string para_name_to_retrieve,
-                                     std::string block_name);
-
-#endif  // CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.� IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*
+The Functions defined in RtcUtility.cpp are declared here in RtcUtility.h.
+*/
+
+#ifndef CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
+#define CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
+#include <picojson.h>
+#include <vector>
+#include <string>
+
+std::vector<std::string> get_combi_string_vec();
+
+int split_comb_string(std::string option);
+
+int calling_combination_function(std::vector<std::string> combi_vec_list);
+
+int check_positive_CO_present(std::string find_string);
+
+int check_negative_CO_present(std::string find_string);
+
+bool calling_resp_function(const std::string block_name,
+                           const char** Combination_CO,
+                           int Combination_CO_size, int max_thread_position,
+                           int fast_math_present);
+
+picojson::array getblock_fromconfig();
+
+std::string get_string_parameters(std::string para_name_to_retrieve,
+                                  std::string block_name);
+
+picojson::array get_array_parameters(std::string para_name_to_retrieve,
+                                     std::string block_name);
+
+#endif  // CATCH_UNIT_RTC_HEADERS_RTCUTILITY_H_
diff --git a/projects/hip-tests/catch/unit/synchronization/CMakeLists.txt b/projects/hip-tests/catch/unit/synchronization/CMakeLists.txt
index 0bff39f301..50018e5c43 100644
--- a/projects/hip-tests/catch/unit/synchronization/CMakeLists.txt
+++ b/projects/hip-tests/catch/unit/synchronization/CMakeLists.txt
@@ -1,25 +1,25 @@
-# Common Tests - Test independent of all platforms
-set(TEST_SRC
-    copy_coherency.cc
-)
-add_custom_target(memcpyInt.hsaco COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR}
-                  ${CMAKE_CURRENT_SOURCE_DIR}/memcpyIntDevice.cpp -o
-                  ${CMAKE_CURRENT_BINARY_DIR}/../synchronization/memcpyInt.hsaco -I
-                  ${HIP_PATH}/include -I
-                  ${CMAKE_CURRENT_SOURCE_DIR}/../../include -L
-                  ${HIP_PATH}/${CMAKE_INSTALL_LIBDIR}/../../include --rocm-path=${ROCM_PATH})
-# only for AMD
-if(HIP_PLATFORM MATCHES "amd")
-  set(AMD_SRC
-    cache_coherency_cpu_gpu.cc
-    cache_coherency_gpu_gpu.cc
-  )
-  set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
-endif()
-
-hip_add_exe_to_target(NAME synchronizationTests
-                      TEST_SRC ${TEST_SRC}
-                      TEST_TARGET_NAME build_tests
-                      COMPILE_OPTIONS -std=c++14)
-add_dependencies(synchronizationTests memcpyInt.hsaco)
-
+# Common Tests - Test independent of all platforms
+set(TEST_SRC
+    copy_coherency.cc
+)
+add_custom_target(memcpyInt.hsaco COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR}
+                  ${CMAKE_CURRENT_SOURCE_DIR}/memcpyIntDevice.cpp -o
+                  ${CMAKE_CURRENT_BINARY_DIR}/../synchronization/memcpyInt.hsaco -I
+                  ${HIP_PATH}/include -I
+                  ${CMAKE_CURRENT_SOURCE_DIR}/../../include -L
+                  ${HIP_PATH}/${CMAKE_INSTALL_LIBDIR}/../../include --rocm-path=${ROCM_PATH})
+# only for AMD
+if(HIP_PLATFORM MATCHES "amd")
+  set(AMD_SRC
+    cache_coherency_cpu_gpu.cc
+    cache_coherency_gpu_gpu.cc
+  )
+  set(TEST_SRC ${TEST_SRC} ${AMD_SRC})
+endif()
+
+hip_add_exe_to_target(NAME synchronizationTests
+                      TEST_SRC ${TEST_SRC}
+                      TEST_TARGET_NAME build_tests
+                      COMPILE_OPTIONS -std=c++14)
+add_dependencies(synchronizationTests memcpyInt.hsaco)
+
diff --git a/projects/hip-tests/catch/unit/synchronization/cache_coherency_cpu_gpu.cc b/projects/hip-tests/catch/unit/synchronization/cache_coherency_cpu_gpu.cc
index c33eff82da..0d4c9c0136 100644
--- a/projects/hip-tests/catch/unit/synchronization/cache_coherency_cpu_gpu.cc
+++ b/projects/hip-tests/catch/unit/synchronization/cache_coherency_cpu_gpu.cc
@@ -1,282 +1,282 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-// Simple test for Fine Grained CPU-GPU coherency.
-
-#include <hip_test_kernels.hh>
-#include <hip_test_common.hh>
-
-typedef _Atomic(unsigned int) atomic_uint;
-
-// Helper function to spin on address until address equals value.
-// If the address holds the value of -1, abort because the other thread failed.
-__device__ int
-gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
-                                       unsigned int value) {
-  unsigned int compare;
-  bool check = false;
-  do {
-    compare = value;
-    check = __opencl_atomic_compare_exchange_strong(
-      reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
-       /*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
-      /*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
-    if (compare == -1)
-      return -1;
-  } while (!check);
-  return 0;
-}
-
-// This kernel requires a single block, single thread dispatch.
-__global__ void
-gpu_kernel(int *A, int *B, int *X, int *Y, size_t N,
-           unsigned int *AA1, unsigned int *AA2,
-           unsigned int *BA1, unsigned int *BA2, unsigned int *dresult) {
-  for (size_t i = 0; i < N; i++) {
-    // Store data into A, system fence, and atomically mark flag.
-    // This guarantees this global write is visible by device 1.
-    A[i] = X[i];
-    __opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
-                      __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
-    // Wait on device 1's global write to B.
-    if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
-      *dresult = -1;
-      break;
-    }
-
-    // Check device 1 properly stored Y into B.
-    bool stored_data_matches = (B[i] == Y[i]);
-    if (!stored_data_matches) {
-      // If the data does not match, alert other thread and abort.
-      printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
-             i, B[i], Y[i]);
-      __opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
-                    __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
-      *dresult = -1;
-    }
-    // Otherwise tell the other thread to continue.
-    __opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
-                    __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
-    // Wait on kernel gpu_cache1 to finish checking X is stored in A.
-    if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
-      *dresult = -1;
-      break;
-    }
-  }
-  *dresult = 0;
-}
-
-__host__ int
-cpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
-                                       unsigned int value) {
-  unsigned int compare;
-  bool check = false;
-  do {
-    compare = value;
-    check = __atomic_compare_exchange_n(
-      address, /*expected=*/ &compare, /*desired=*/ value,
-      /*weak=*/ false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
-    if (compare == -1)
-      return -1;
-  } while (!check);
-  return 0;
-}
-
-// This host thread runs only on a single CPU thread.
-__host__ void
-cpu_thread(int *A, int *B, int *X, int *Y, size_t N,
-           unsigned int *AA1, unsigned int *AA2,
-           unsigned int *BA1, unsigned int *BA2, unsigned int *hresult) {
-  for (size_t i = 0; i < N; i++) {
-    B[i] = Y[i];
-    __atomic_fetch_add(BA1, 1, __ATOMIC_RELEASE);
-    if (cpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
-      *hresult = -1;
-      break;
-    }
-
-    bool stored_data_matches = (A[i] == X[i]);
-    if (!stored_data_matches) {
-      printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
-             i, A[i], X[i]);
-      __atomic_exchange_n(BA2, -1, __ATOMIC_RELEASE);
-      *hresult = -1;
-      break;
-    }
-    __atomic_fetch_add(BA2, 1, __ATOMIC_RELEASE);
-    if (cpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
-      *hresult = -1;
-      break;
-    }
-  }
-  *hresult = 0;
-}
-
-static bool cpu_to_gpu_coherency() {
-  int *A_d, *B_d, *X_d, *Y_d;
-  int *A_res, *A_h, *B_h, *X_h, *Y_h;
-  unsigned int hresult, dresult;
-  size_t N = 1024;
-  size_t Nbytes = N * sizeof(int);
-  int numDevices = 0;
-
-  HIP_CHECK(hipGetDeviceCount(&numDevices));
-  if (numDevices < 1) {
-    HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
-    return 0;
-  }
-
-  // Skip this test if feature is not supported.
-  static int device0 = 0;
-  hipDeviceProp_t props;
-  HIP_CHECK(hipGetDeviceProperties(&props, device0));
-  if (strncmp(props.gcnArchName, "gfx90a", 6) != 0 &&
-      strncmp(props.gcnArchName, "gfx940", 6) != 0) {
-    printf("info: skipping test on devices other than gfx90a and gfx940.\n");
-    return true;
-  }
-
-  // Allocate Host Side Memory. Coherent Fine-grained Memory for array B.
-  printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
-  HIP_CHECK(hipHostMalloc(&B_h, Nbytes,
-                         (hipHostMallocCoherent | hipHostMallocMapped)));
-  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&B_d), B_h, 0));
-  X_h = reinterpret_cast<int*>(malloc(Nbytes));
-  HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
-  Y_h = reinterpret_cast<int*>(malloc(Nbytes));
-  HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
-
-  // Initialize the arrays and atomic variables.
-  for (size_t i = 0; i < N; i++) {
-    X_h[i] = 100000000 + i;
-    Y_h[i] = 300000000 + i;
-  }
-
-  // Initialize shared atomic flags between CPU and GPU.
-  unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
-  unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
-  HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
-  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
-                                    AA1_h, 0));
-  *AA1_h = 0;
-  HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
-  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
-                                    AA2_h, 0));
-  *AA2_h = 0;
-  HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
-  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
-                                    BA1_h, 0));
-  *BA1_h = 0;
-  HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
-  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
-                                    BA2_h, 0));
-  *BA2_h = 0;
-
-  // Skip the first stream, ensure stream is non-blocking.
-  hipStream_t stream[2];
-  HIP_CHECK(hipStreamCreate(&stream[0]));
-  HIP_CHECK(hipSetDevice(0));
-  HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
-
-  // Allocate Device Side Memory. Coherent Fine-grained Memory for array A.
-  printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
-  hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
-                                           Nbytes, hipDeviceMallocFinegrained);
-  REQUIRE(status == hipSuccess);
-  // SVM memory - host pointer is the same as device pointer to array A.
-  A_h = A_d;
-  HIP_CHECK(hipMalloc(&X_d, Nbytes));
-  HIP_CHECK(hipMalloc(&Y_d, Nbytes));
-
-  HIP_CHECK(hipMemcpy(X_d, X_h, Nbytes, hipMemcpyHostToDevice));
-  HIP_CHECK(hipMemcpy(Y_d, Y_h, Nbytes, hipMemcpyHostToDevice));
-
-  // Launch the GPU kernel.
-  const unsigned blocks = 1;
-  const unsigned threadsPerBlock = 1;
-  hipLaunchKernelGGL(gpu_kernel, dim3(blocks), dim3(threadsPerBlock),
-                     0, stream[1],
-                     A_d, B_d, X_d, Y_d, N,
-                     AA1_d, AA2_d, BA1_d, BA2_d, &dresult);
-  // Check if launch failed.
-  HIP_CHECK(hipGetLastError());
-  REQUIRE(dresult == 0);
-
-  // Do not sync the launched stream, instead run the cpu_thread.
-  std::thread host_thread(cpu_thread,
-                          A_h, B_h, X_h, Y_h, N,
-                          AA1_h, AA2_h, BA1_h, BA2_h, &hresult);
-  host_thread.detach();
-  REQUIRE(hresult == 0);
-  // Wait for Device side to finish.
-  HIP_CHECK(hipStreamSynchronize(stream[1]));
-
-  // Evaluate the resultant arrays A and B.
-  A_res = reinterpret_cast<int*>(malloc(Nbytes));
-  HIP_CHECK(A_res == 0 ? hipErrorOutOfMemory : hipSuccess);
-  HIP_CHECK(hipMemcpy(A_res, A_d, Nbytes, hipMemcpyDeviceToHost));
-
-  for (size_t i = 0; i < N; i++)  {
-    REQUIRE(A_res[i] == (100000000 + i));
-    REQUIRE(B_h[i] == (300000000 + i));
-  }
-
-  // Free all the device and host memory allocated.
-  HIP_CHECK(hipFree(A_d));
-  HIP_CHECK(hipFree(X_d));
-  HIP_CHECK(hipFree(Y_d));
-  HIP_CHECK(hipHostFree(AA1_h));
-  HIP_CHECK(hipHostFree(AA2_h));
-  HIP_CHECK(hipHostFree(BA1_h));
-  HIP_CHECK(hipHostFree(BA2_h));
-  HIP_CHECK(hipHostFree(B_h));
-  free(X_h);
-  free(Y_h);
-  free(A_res);
-
-  return true;
-}
-
-/**
- * Test Description
- * ------------------------
- *    - This test runs on devices where XGMI enables fine-grained communication
- * between GPUs. This performs a message passing test.
- * Array A is allocated on Device 0, and remotely on host.
- * Device 0 also increments atomic ints AA1 and AA2.
- * Array B is allocated on host, and remotely on Device 0.
- * Host also increments atomic ints BA1 and BA2.
- * Kernel will launch on Device 0, and store array X into array A.
- * Host Thread will store array Y into array B.
- * Kernel will validate that the correct values of array Y are stored in B.
- * Host Thread will validate that the correct values of array X are stored in A.
-
- * Test source
- * ------------------------
- *    - catch/unit/synchronization/cache_coherency_cpu_gpu.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.5
- *    - Test to be run only on AMD.
- */
-
-TEST_CASE("Unit_cache_coherency_cpu_gpu") {
-  bool passed = true;
-  // Coherency between CPU and GPU sharing host and device memory.
-  REQUIRE(passed == cpu_to_gpu_coherency());
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+// Simple test for Fine Grained CPU-GPU coherency.
+
+#include <hip_test_kernels.hh>
+#include <hip_test_common.hh>
+
+typedef _Atomic(unsigned int) atomic_uint;
+
+// Helper function to spin on address until address equals value.
+// If the address holds the value of -1, abort because the other thread failed.
+__device__ int
+gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
+                                       unsigned int value) {
+  unsigned int compare;
+  bool check = false;
+  do {
+    compare = value;
+    check = __opencl_atomic_compare_exchange_strong(
+      reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
+       /*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
+      /*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
+    if (compare == -1)
+      return -1;
+  } while (!check);
+  return 0;
+}
+
+// This kernel requires a single block, single thread dispatch.
+__global__ void
+gpu_kernel(int *A, int *B, int *X, int *Y, size_t N,
+           unsigned int *AA1, unsigned int *AA2,
+           unsigned int *BA1, unsigned int *BA2, unsigned int *dresult) {
+  for (size_t i = 0; i < N; i++) {
+    // Store data into A, system fence, and atomically mark flag.
+    // This guarantees this global write is visible by device 1.
+    A[i] = X[i];
+    __opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
+                      __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
+    // Wait on device 1's global write to B.
+    if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
+      *dresult = -1;
+      break;
+    }
+
+    // Check device 1 properly stored Y into B.
+    bool stored_data_matches = (B[i] == Y[i]);
+    if (!stored_data_matches) {
+      // If the data does not match, alert other thread and abort.
+      printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
+             i, B[i], Y[i]);
+      __opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
+                    __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
+      *dresult = -1;
+    }
+    // Otherwise tell the other thread to continue.
+    __opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
+                    __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
+    // Wait on kernel gpu_cache1 to finish checking X is stored in A.
+    if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
+      *dresult = -1;
+      break;
+    }
+  }
+  *dresult = 0;
+}
+
+__host__ int
+cpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
+                                       unsigned int value) {
+  unsigned int compare;
+  bool check = false;
+  do {
+    compare = value;
+    check = __atomic_compare_exchange_n(
+      address, /*expected=*/ &compare, /*desired=*/ value,
+      /*weak=*/ false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE);
+    if (compare == -1)
+      return -1;
+  } while (!check);
+  return 0;
+}
+
+// This host thread runs only on a single CPU thread.
+__host__ void
+cpu_thread(int *A, int *B, int *X, int *Y, size_t N,
+           unsigned int *AA1, unsigned int *AA2,
+           unsigned int *BA1, unsigned int *BA2, unsigned int *hresult) {
+  for (size_t i = 0; i < N; i++) {
+    B[i] = Y[i];
+    __atomic_fetch_add(BA1, 1, __ATOMIC_RELEASE);
+    if (cpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
+      *hresult = -1;
+      break;
+    }
+
+    bool stored_data_matches = (A[i] == X[i]);
+    if (!stored_data_matches) {
+      printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
+             i, A[i], X[i]);
+      __atomic_exchange_n(BA2, -1, __ATOMIC_RELEASE);
+      *hresult = -1;
+      break;
+    }
+    __atomic_fetch_add(BA2, 1, __ATOMIC_RELEASE);
+    if (cpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
+      *hresult = -1;
+      break;
+    }
+  }
+  *hresult = 0;
+}
+
+static bool cpu_to_gpu_coherency() {
+  int *A_d, *B_d, *X_d, *Y_d;
+  int *A_res, *A_h, *B_h, *X_h, *Y_h;
+  unsigned int hresult, dresult;
+  size_t N = 1024;
+  size_t Nbytes = N * sizeof(int);
+  int numDevices = 0;
+
+  HIP_CHECK(hipGetDeviceCount(&numDevices));
+  if (numDevices < 1) {
+    HipTest::HIP_SKIP_TEST("Skipping because devices < 1");
+    return 0;
+  }
+
+  // Skip this test if feature is not supported.
+  static int device0 = 0;
+  hipDeviceProp_t props;
+  HIP_CHECK(hipGetDeviceProperties(&props, device0));
+  if (strncmp(props.gcnArchName, "gfx90a", 6) != 0 &&
+      strncmp(props.gcnArchName, "gfx940", 6) != 0) {
+    printf("info: skipping test on devices other than gfx90a and gfx940.\n");
+    return true;
+  }
+
+  // Allocate Host Side Memory. Coherent Fine-grained Memory for array B.
+  printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
+  HIP_CHECK(hipHostMalloc(&B_h, Nbytes,
+                         (hipHostMallocCoherent | hipHostMallocMapped)));
+  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&B_d), B_h, 0));
+  X_h = reinterpret_cast<int*>(malloc(Nbytes));
+  HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
+  Y_h = reinterpret_cast<int*>(malloc(Nbytes));
+  HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
+
+  // Initialize the arrays and atomic variables.
+  for (size_t i = 0; i < N; i++) {
+    X_h[i] = 100000000 + i;
+    Y_h[i] = 300000000 + i;
+  }
+
+  // Initialize shared atomic flags between CPU and GPU.
+  unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
+  unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
+  HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
+  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
+                                    AA1_h, 0));
+  *AA1_h = 0;
+  HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
+  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
+                                    AA2_h, 0));
+  *AA2_h = 0;
+  HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
+  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
+                                    BA1_h, 0));
+  *BA1_h = 0;
+  HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
+  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
+                                    BA2_h, 0));
+  *BA2_h = 0;
+
+  // Skip the first stream, ensure stream is non-blocking.
+  hipStream_t stream[2];
+  HIP_CHECK(hipStreamCreate(&stream[0]));
+  HIP_CHECK(hipSetDevice(0));
+  HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
+
+  // Allocate Device Side Memory. Coherent Fine-grained Memory for array A.
+  printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
+  hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
+                                           Nbytes, hipDeviceMallocFinegrained);
+  REQUIRE(status == hipSuccess);
+  // SVM memory - host pointer is the same as device pointer to array A.
+  A_h = A_d;
+  HIP_CHECK(hipMalloc(&X_d, Nbytes));
+  HIP_CHECK(hipMalloc(&Y_d, Nbytes));
+
+  HIP_CHECK(hipMemcpy(X_d, X_h, Nbytes, hipMemcpyHostToDevice));
+  HIP_CHECK(hipMemcpy(Y_d, Y_h, Nbytes, hipMemcpyHostToDevice));
+
+  // Launch the GPU kernel.
+  const unsigned blocks = 1;
+  const unsigned threadsPerBlock = 1;
+  hipLaunchKernelGGL(gpu_kernel, dim3(blocks), dim3(threadsPerBlock),
+                     0, stream[1],
+                     A_d, B_d, X_d, Y_d, N,
+                     AA1_d, AA2_d, BA1_d, BA2_d, &dresult);
+  // Check if launch failed.
+  HIP_CHECK(hipGetLastError());
+  REQUIRE(dresult == 0);
+
+  // Do not sync the launched stream, instead run the cpu_thread.
+  std::thread host_thread(cpu_thread,
+                          A_h, B_h, X_h, Y_h, N,
+                          AA1_h, AA2_h, BA1_h, BA2_h, &hresult);
+  host_thread.detach();
+  REQUIRE(hresult == 0);
+  // Wait for Device side to finish.
+  HIP_CHECK(hipStreamSynchronize(stream[1]));
+
+  // Evaluate the resultant arrays A and B.
+  A_res = reinterpret_cast<int*>(malloc(Nbytes));
+  HIP_CHECK(A_res == 0 ? hipErrorOutOfMemory : hipSuccess);
+  HIP_CHECK(hipMemcpy(A_res, A_d, Nbytes, hipMemcpyDeviceToHost));
+
+  for (size_t i = 0; i < N; i++)  {
+    REQUIRE(A_res[i] == (100000000 + i));
+    REQUIRE(B_h[i] == (300000000 + i));
+  }
+
+  // Free all the device and host memory allocated.
+  HIP_CHECK(hipFree(A_d));
+  HIP_CHECK(hipFree(X_d));
+  HIP_CHECK(hipFree(Y_d));
+  HIP_CHECK(hipHostFree(AA1_h));
+  HIP_CHECK(hipHostFree(AA2_h));
+  HIP_CHECK(hipHostFree(BA1_h));
+  HIP_CHECK(hipHostFree(BA2_h));
+  HIP_CHECK(hipHostFree(B_h));
+  free(X_h);
+  free(Y_h);
+  free(A_res);
+
+  return true;
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - This test runs on devices where XGMI enables fine-grained communication
+ * between GPUs. This performs a message passing test.
+ * Array A is allocated on Device 0, and remotely on host.
+ * Device 0 also increments atomic ints AA1 and AA2.
+ * Array B is allocated on host, and remotely on Device 0.
+ * Host also increments atomic ints BA1 and BA2.
+ * Kernel will launch on Device 0, and store array X into array A.
+ * Host Thread will store array Y into array B.
+ * Kernel will validate that the correct values of array Y are stored in B.
+ * Host Thread will validate that the correct values of array X are stored in A.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/synchronization/cache_coherency_cpu_gpu.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.5
+ *    - Test to be run only on AMD.
+ */
+
+TEST_CASE("Unit_cache_coherency_cpu_gpu") {
+  bool passed = true;
+  // Coherency between CPU and GPU sharing host and device memory.
+  REQUIRE(passed == cpu_to_gpu_coherency());
+}
diff --git a/projects/hip-tests/catch/unit/synchronization/cache_coherency_gpu_gpu.cc b/projects/hip-tests/catch/unit/synchronization/cache_coherency_gpu_gpu.cc
index 3a645c2c39..42df8266ad 100644
--- a/projects/hip-tests/catch/unit/synchronization/cache_coherency_gpu_gpu.cc
+++ b/projects/hip-tests/catch/unit/synchronization/cache_coherency_gpu_gpu.cc
@@ -1,294 +1,294 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-// Simple test for Fine Grained GPU-GPU coherency.
-
-#include <hip_test_kernels.hh>
-#include <hip_test_common.hh>
-
-typedef _Atomic(unsigned int) atomic_uint;
-
-// Helper function to spin on address until address equals value.
-// If the address holds the value of -1, abort because the other thread failed.
-__device__ int
-gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
-                                       unsigned int value) {
-  unsigned int compare;
-  bool check = false;
-  do {
-    compare = value;
-    check = __opencl_atomic_compare_exchange_strong(
-      reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
-       /*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
-      /*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
-    if (compare == -1)
-      return -1;
-  } while (!check);
-  return 0;
-}
-
-// This kernel requires a single block, single thread dispatch.
-__global__ void
-gpu_cache0(int *A, int *B, int *X, int *Y, size_t N,
-           unsigned int *AA1, unsigned int *AA2,
-           unsigned int *BA1, unsigned int *BA2, unsigned int *cache0_result) {
-  for (size_t i = 0; i < N; i++) {
-    // Store data into A, system fence, and atomically mark flag.
-    // This guarantees this global write is visible by device 1.
-    A[i] = X[i];
-    __opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
-                    __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
-    // Wait on device 1's global write to B.
-    if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
-      *cache0_result = -1;
-      break;
-    }
-
-    // Check device 1 properly stored Y into B.
-    bool stored_data_matches = (B[i] == Y[i]);
-    if (!stored_data_matches) {
-      // If the data does not match, alert other thread and abort.
-      printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
-             i, B[i], Y[i]);
-      __opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
-                    __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
-      *cache0_result = -1;
-    }
-    // Otherwise tell the other thread to continue.
-    __opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
-                    __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
-    // Wait on kernel gpu_cache1 to finish checking X is stored in A.
-    if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
-      *cache0_result = -1;
-      break;
-    }
-  }
-  *cache0_result = 0;
-}
-
-// This kernel requires a single block, single thread dispatch.
-__global__ void
-gpu_cache1(int *A, int *B, int *X, int *Y, size_t N,
-           unsigned int *AA1, unsigned int *AA2,
-           unsigned int *BA1, unsigned int *BA2, unsigned int *cache1_result) {
-  for (size_t i = 0; i < N; i++) {
-    B[i] = Y[i];
-    __opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA1), 1,
-                __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
-    if (gpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
-      *cache1_result = -1;
-      break;
-    }
-
-    bool stored_data_matches = (A[i] == X[i]);
-    if (!stored_data_matches) {
-      printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
-             i, A[i], X[i]);
-      __opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(BA2), -1,
-                    __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
-      *cache1_result = -1;
-    }
-    __opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA2), 1,
-                    __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
-    if (gpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
-      *cache1_result = -1;
-      break;
-    }
-  }
-  *cache1_result = 0;
-}
-
-static bool gpu_to_gpu_coherency() {
-  int *A_d, *B_d, *X_d0, *X_d1, *Y_d0, *Y_d1;
-  int *A_h, *B_h, *X_h, *Y_h;
-  unsigned int cache0_result, cache1_result;
-  size_t N = 1024;
-  size_t Nbytes = N * sizeof(int);
-  int numDevices = 0;
-  int numTestDevices = 2;
-
-  HIP_CHECK(hipGetDeviceCount(&numDevices));
-  if (numDevices < numTestDevices) {
-    HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
-    return 0;
-  }
-
-  // Skip this test if either device does not support this feature.
-  hipDeviceProp_t props0, props1;
-  HIP_CHECK(hipGetDeviceProperties(&props0, 0));
-  HIP_CHECK(hipGetDeviceProperties(&props1, 1));
-  if ((strncmp(props0.gcnArchName, "gfx90a", 6) != 0 ||
-       strncmp(props1.gcnArchName, "gfx90a", 6) != 0) &&
-      (strncmp(props0.gcnArchName, "gfx940", 6) != 0 ||
-       strncmp(props1.gcnArchName, "gfx940", 6) != 0)) {
-    printf("info: skipping test on devices other than gfx90a and gfx940.\n");
-    return true;
-  }
-
-  // Allocate Host Side Memory.
-  printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
-  A_h = reinterpret_cast<int*>(malloc(Nbytes));
-  HIP_CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
-  B_h = reinterpret_cast<int*>(malloc(Nbytes));
-  HIP_CHECK(B_h == 0 ? hipErrorOutOfMemory : hipSuccess);
-  X_h = reinterpret_cast<int*>(malloc(Nbytes));
-  HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
-  Y_h = reinterpret_cast<int*>(malloc(Nbytes));
-  HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
-
-  // Initialize the arrays and atomic variables.
-  for (size_t i = 0; i < N; i++) {
-    X_h[i] = 100000000 + i;
-    Y_h[i] = 300000000 + i;
-  }
-
-  // Initialize shared atomic flags on host coherent memory.
-  unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
-  unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
-  HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
-  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
-                                     AA1_h, 0));
-  *AA1_h = 0;
-  HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
-  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
-                                     AA2_h, 0));
-  *AA2_h = 0;
-  HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
-  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
-                                     BA1_h, 0));
-  *BA1_h = 0;
-  HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
-  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
-                                     BA2_h, 0));
-  *BA2_h = 0;
-
-  // Skip the first stream.
-  hipStream_t stream[3];
-  HIP_CHECK(hipStreamCreate(&stream[0]));
-
-  // Set-up Device 0.
-  HIP_CHECK(hipSetDevice(0));
-  // Enable P2P access to Device 1.
-  HIP_CHECK(hipDeviceEnablePeerAccess(1, 0));
-  HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
-  // Allocating Coherent Memory for Array A_d on Device 0.
-  printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
-  hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
-                                           Nbytes, hipDeviceMallocFinegrained);
-  REQUIRE(status == hipSuccess);
-  HIP_CHECK(hipMalloc(&X_d0, Nbytes));
-  HIP_CHECK(hipMalloc(&Y_d0, Nbytes));
-
-  // Set-up Device 1.
-  HIP_CHECK(hipSetDevice(1));
-  // Enable P2P access to Device 0.
-  HIP_CHECK(hipDeviceEnablePeerAccess(0, 0));
-  HIP_CHECK(hipStreamCreateWithFlags(&stream[2], hipStreamNonBlocking));
-  // Allocating Coherent Memory for Array B_d on Device 1.
-  printf("info: allocate device 1 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
-  status = hipExtMallocWithFlags(reinterpret_cast<void**>(&B_d),
-                                 Nbytes, hipDeviceMallocFinegrained);
-  REQUIRE(status == hipSuccess);
-  HIP_CHECK(hipMalloc(&X_d1, Nbytes));
-  HIP_CHECK(hipMalloc(&Y_d1, Nbytes));
-
-  // Transfer initialized data onto the device arrays.
-  HIP_CHECK(hipMemcpy(X_d0, X_h, Nbytes, hipMemcpyHostToDevice));
-  HIP_CHECK(hipMemcpy(X_d1, X_h, Nbytes, hipMemcpyHostToDevice));
-  HIP_CHECK(hipMemcpy(Y_d0, Y_h, Nbytes, hipMemcpyHostToDevice));
-  HIP_CHECK(hipMemcpy(Y_d1, Y_h, Nbytes, hipMemcpyHostToDevice));
-
-  // Prepare and launch the device kernels.
-  const unsigned blocks = 1;
-  const unsigned threadsPerBlock = 1;
-  HIP_CHECK(hipSetDevice(0));
-  hipLaunchKernelGGL(gpu_cache0, dim3(blocks), dim3(threadsPerBlock),
-                     0, stream[1],
-                     A_d, B_d, X_d0, Y_d0, N,
-                     AA1_d, AA2_d, BA1_d, BA2_d, &cache0_result);
-  // Check if launch failed.
-  HIP_CHECK(hipGetLastError());
-  REQUIRE(cache0_result == 0);
-  HIP_CHECK(hipSetDevice(1));
-  hipLaunchKernelGGL(gpu_cache1, dim3(blocks), dim3(threadsPerBlock),
-                     0, stream[2],
-                     A_d, B_d, X_d1, Y_d1, N,
-                     AA1_d, AA2_d, BA1_d, BA2_d, &cache1_result);
-  HIP_CHECK(hipGetLastError());
-  REQUIRE(cache1_result == 0);
-
-  // Wait for kernels on both devices.
-  HIP_CHECK(hipStreamSynchronize(stream[1]));
-  HIP_CHECK(hipStreamSynchronize(stream[2]));
-
-  // Evaluate the resultant arrays A and B.
-  HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
-  HIP_CHECK(hipMemcpy(B_h, B_d, Nbytes, hipMemcpyDeviceToHost));
-
-  for (size_t i = 0; i < N; i++)  {
-    REQUIRE(A_h[i] == (100000000 + i));
-    REQUIRE(B_h[i] == (300000000 + i));
-  }
-
-  // Free all the device and host memory allocated.
-  HIP_CHECK(hipFree(A_d));
-  HIP_CHECK(hipFree(B_d));
-  HIP_CHECK(hipFree(X_d0));
-  HIP_CHECK(hipFree(Y_d0));
-  HIP_CHECK(hipFree(X_d1));
-  HIP_CHECK(hipFree(Y_d1));
-  HIP_CHECK(hipHostFree(AA1_h));
-  HIP_CHECK(hipHostFree(AA2_h));
-  HIP_CHECK(hipHostFree(BA1_h));
-  HIP_CHECK(hipHostFree(BA2_h));
-  free(A_h);
-  free(B_h);
-  free(X_h);
-  free(Y_h);
-
-  return true;
-}
-
-/**
- * Test Description
- * ------------------------
- *    - This test runs on devices where XGMI enables fine-grained communication
- * between GPUs. This performs a message passing test.
- * Array A is allocated on Device 0, and remotely on Device 1.
- * Device 0 also increments atomic ints AA1 and AA2.
- * Array B is allocated on Device 1, and remotely on Device 0.
- * Device 1 also increments atomic ints BA1 and BA2.
- * Kernel 0 will launch on Device 0, and store array X into array A.
- * Kernel 1 will launch on Device 1, and store array Y into array B.
- * Kernel 0 will validate that the correct values of array Y are stored in B.
- * Kernel 1 will validate that the correct values of array X are stored in A.
-
- * Test source
- * ------------------------
- *    - catch/unit/synchronization/cache_coherency_gpu_gpu.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.5
- *    - Test to be run only on AMD.
- */
-
-TEST_CASE("Unit_cache_coherency_gpu_gpu") {
-  bool passed = true;
-  // Coherency between GPUs accessing local or remote FB.
-  REQUIRE(passed == gpu_to_gpu_coherency());
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+// Simple test for Fine Grained GPU-GPU coherency.
+
+#include <hip_test_kernels.hh>
+#include <hip_test_common.hh>
+
+typedef _Atomic(unsigned int) atomic_uint;
+
+// Helper function to spin on address until address equals value.
+// If the address holds the value of -1, abort because the other thread failed.
+__device__ int
+gpu_spin_loop_or_abort_on_negative_one(unsigned int* address,
+                                       unsigned int value) {
+  unsigned int compare;
+  bool check = false;
+  do {
+    compare = value;
+    check = __opencl_atomic_compare_exchange_strong(
+      reinterpret_cast<atomic_uint*>(address), /*expected=*/ &compare,
+       /*desired=*/ value, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE,
+      /*scope=*/ __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
+    if (compare == -1)
+      return -1;
+  } while (!check);
+  return 0;
+}
+
+// This kernel requires a single block, single thread dispatch.
+__global__ void
+gpu_cache0(int *A, int *B, int *X, int *Y, size_t N,
+           unsigned int *AA1, unsigned int *AA2,
+           unsigned int *BA1, unsigned int *BA2, unsigned int *cache0_result) {
+  for (size_t i = 0; i < N; i++) {
+    // Store data into A, system fence, and atomically mark flag.
+    // This guarantees this global write is visible by device 1.
+    A[i] = X[i];
+    __opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA1), 1,
+                    __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
+    // Wait on device 1's global write to B.
+    if (gpu_spin_loop_or_abort_on_negative_one(BA1, i+1) == -1) {
+      *cache0_result = -1;
+      break;
+    }
+
+    // Check device 1 properly stored Y into B.
+    bool stored_data_matches = (B[i] == Y[i]);
+    if (!stored_data_matches) {
+      // If the data does not match, alert other thread and abort.
+      printf("FAIL: at i=%zu, B[i]=%d, which does not match Y[i]=%d.\n",
+             i, B[i], Y[i]);
+      __opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(AA2), -1,
+                    __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
+      *cache0_result = -1;
+    }
+    // Otherwise tell the other thread to continue.
+    __opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(AA2), 1,
+                    __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
+    // Wait on kernel gpu_cache1 to finish checking X is stored in A.
+    if (gpu_spin_loop_or_abort_on_negative_one(BA2, i+1) == -1) {
+      *cache0_result = -1;
+      break;
+    }
+  }
+  *cache0_result = 0;
+}
+
+// This kernel requires a single block, single thread dispatch.
+__global__ void
+gpu_cache1(int *A, int *B, int *X, int *Y, size_t N,
+           unsigned int *AA1, unsigned int *AA2,
+           unsigned int *BA1, unsigned int *BA2, unsigned int *cache1_result) {
+  for (size_t i = 0; i < N; i++) {
+    B[i] = Y[i];
+    __opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA1), 1,
+                __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
+    if (gpu_spin_loop_or_abort_on_negative_one(AA1, i+1) == -1) {
+      *cache1_result = -1;
+      break;
+    }
+
+    bool stored_data_matches = (A[i] == X[i]);
+    if (!stored_data_matches) {
+      printf("FAIL: at i=%zu, A[i]=%d, which does not match X[i]=%d.\n",
+             i, A[i], X[i]);
+      __opencl_atomic_exchange(reinterpret_cast<atomic_uint*>(BA2), -1,
+                    __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
+      *cache1_result = -1;
+    }
+    __opencl_atomic_fetch_add(reinterpret_cast<atomic_uint*>(BA2), 1,
+                    __ATOMIC_RELEASE, __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES);
+    if (gpu_spin_loop_or_abort_on_negative_one(AA2, i+1) == -1) {
+      *cache1_result = -1;
+      break;
+    }
+  }
+  *cache1_result = 0;
+}
+
+static bool gpu_to_gpu_coherency() {
+  int *A_d, *B_d, *X_d0, *X_d1, *Y_d0, *Y_d1;
+  int *A_h, *B_h, *X_h, *Y_h;
+  unsigned int cache0_result, cache1_result;
+  size_t N = 1024;
+  size_t Nbytes = N * sizeof(int);
+  int numDevices = 0;
+  int numTestDevices = 2;
+
+  HIP_CHECK(hipGetDeviceCount(&numDevices));
+  if (numDevices < numTestDevices) {
+    HipTest::HIP_SKIP_TEST("Skipping because devices < 2");
+    return 0;
+  }
+
+  // Skip this test if either device does not support this feature.
+  hipDeviceProp_t props0, props1;
+  HIP_CHECK(hipGetDeviceProperties(&props0, 0));
+  HIP_CHECK(hipGetDeviceProperties(&props1, 1));
+  if ((strncmp(props0.gcnArchName, "gfx90a", 6) != 0 ||
+       strncmp(props1.gcnArchName, "gfx90a", 6) != 0) &&
+      (strncmp(props0.gcnArchName, "gfx940", 6) != 0 ||
+       strncmp(props1.gcnArchName, "gfx940", 6) != 0)) {
+    printf("info: skipping test on devices other than gfx90a and gfx940.\n");
+    return true;
+  }
+
+  // Allocate Host Side Memory.
+  printf("info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
+  A_h = reinterpret_cast<int*>(malloc(Nbytes));
+  HIP_CHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess);
+  B_h = reinterpret_cast<int*>(malloc(Nbytes));
+  HIP_CHECK(B_h == 0 ? hipErrorOutOfMemory : hipSuccess);
+  X_h = reinterpret_cast<int*>(malloc(Nbytes));
+  HIP_CHECK(X_h == 0 ? hipErrorOutOfMemory : hipSuccess);
+  Y_h = reinterpret_cast<int*>(malloc(Nbytes));
+  HIP_CHECK(Y_h == 0 ? hipErrorOutOfMemory : hipSuccess);
+
+  // Initialize the arrays and atomic variables.
+  for (size_t i = 0; i < N; i++) {
+    X_h[i] = 100000000 + i;
+    Y_h[i] = 300000000 + i;
+  }
+
+  // Initialize shared atomic flags on host coherent memory.
+  unsigned int *AA1_h, *AA2_h, *BA1_h, *BA2_h;
+  unsigned int *AA1_d, *AA2_d, *BA1_d, *BA2_d;
+  HIP_CHECK(hipHostMalloc(&AA1_h, sizeof(unsigned int), hipHostMallocCoherent));
+  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA1_d),
+                                     AA1_h, 0));
+  *AA1_h = 0;
+  HIP_CHECK(hipHostMalloc(&AA2_h, sizeof(unsigned int), hipHostMallocCoherent));
+  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&AA2_d),
+                                     AA2_h, 0));
+  *AA2_h = 0;
+  HIP_CHECK(hipHostMalloc(&BA1_h, sizeof(unsigned int), hipHostMallocCoherent));
+  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA1_d),
+                                     BA1_h, 0));
+  *BA1_h = 0;
+  HIP_CHECK(hipHostMalloc(&BA2_h, sizeof(unsigned int), hipHostMallocCoherent));
+  HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast<void**>(&BA2_d),
+                                     BA2_h, 0));
+  *BA2_h = 0;
+
+  // Skip the first stream.
+  hipStream_t stream[3];
+  HIP_CHECK(hipStreamCreate(&stream[0]));
+
+  // Set-up Device 0.
+  HIP_CHECK(hipSetDevice(0));
+  // Enable P2P access to Device 1.
+  HIP_CHECK(hipDeviceEnablePeerAccess(1, 0));
+  HIP_CHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking));
+  // Allocating Coherent Memory for Array A_d on Device 0.
+  printf("info: allocate device 0 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
+  hipError_t status = hipExtMallocWithFlags(reinterpret_cast<void**>(&A_d),
+                                           Nbytes, hipDeviceMallocFinegrained);
+  REQUIRE(status == hipSuccess);
+  HIP_CHECK(hipMalloc(&X_d0, Nbytes));
+  HIP_CHECK(hipMalloc(&Y_d0, Nbytes));
+
+  // Set-up Device 1.
+  HIP_CHECK(hipSetDevice(1));
+  // Enable P2P access to Device 0.
+  HIP_CHECK(hipDeviceEnablePeerAccess(0, 0));
+  HIP_CHECK(hipStreamCreateWithFlags(&stream[2], hipStreamNonBlocking));
+  // Allocating Coherent Memory for Array B_d on Device 1.
+  printf("info: allocate device 1 mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
+  status = hipExtMallocWithFlags(reinterpret_cast<void**>(&B_d),
+                                 Nbytes, hipDeviceMallocFinegrained);
+  REQUIRE(status == hipSuccess);
+  HIP_CHECK(hipMalloc(&X_d1, Nbytes));
+  HIP_CHECK(hipMalloc(&Y_d1, Nbytes));
+
+  // Transfer initialized data onto the device arrays.
+  HIP_CHECK(hipMemcpy(X_d0, X_h, Nbytes, hipMemcpyHostToDevice));
+  HIP_CHECK(hipMemcpy(X_d1, X_h, Nbytes, hipMemcpyHostToDevice));
+  HIP_CHECK(hipMemcpy(Y_d0, Y_h, Nbytes, hipMemcpyHostToDevice));
+  HIP_CHECK(hipMemcpy(Y_d1, Y_h, Nbytes, hipMemcpyHostToDevice));
+
+  // Prepare and launch the device kernels.
+  const unsigned blocks = 1;
+  const unsigned threadsPerBlock = 1;
+  HIP_CHECK(hipSetDevice(0));
+  hipLaunchKernelGGL(gpu_cache0, dim3(blocks), dim3(threadsPerBlock),
+                     0, stream[1],
+                     A_d, B_d, X_d0, Y_d0, N,
+                     AA1_d, AA2_d, BA1_d, BA2_d, &cache0_result);
+  // Check if launch failed.
+  HIP_CHECK(hipGetLastError());
+  REQUIRE(cache0_result == 0);
+  HIP_CHECK(hipSetDevice(1));
+  hipLaunchKernelGGL(gpu_cache1, dim3(blocks), dim3(threadsPerBlock),
+                     0, stream[2],
+                     A_d, B_d, X_d1, Y_d1, N,
+                     AA1_d, AA2_d, BA1_d, BA2_d, &cache1_result);
+  HIP_CHECK(hipGetLastError());
+  REQUIRE(cache1_result == 0);
+
+  // Wait for kernels on both devices.
+  HIP_CHECK(hipStreamSynchronize(stream[1]));
+  HIP_CHECK(hipStreamSynchronize(stream[2]));
+
+  // Evaluate the resultant arrays A and B.
+  HIP_CHECK(hipMemcpy(A_h, A_d, Nbytes, hipMemcpyDeviceToHost));
+  HIP_CHECK(hipMemcpy(B_h, B_d, Nbytes, hipMemcpyDeviceToHost));
+
+  for (size_t i = 0; i < N; i++)  {
+    REQUIRE(A_h[i] == (100000000 + i));
+    REQUIRE(B_h[i] == (300000000 + i));
+  }
+
+  // Free all the device and host memory allocated.
+  HIP_CHECK(hipFree(A_d));
+  HIP_CHECK(hipFree(B_d));
+  HIP_CHECK(hipFree(X_d0));
+  HIP_CHECK(hipFree(Y_d0));
+  HIP_CHECK(hipFree(X_d1));
+  HIP_CHECK(hipFree(Y_d1));
+  HIP_CHECK(hipHostFree(AA1_h));
+  HIP_CHECK(hipHostFree(AA2_h));
+  HIP_CHECK(hipHostFree(BA1_h));
+  HIP_CHECK(hipHostFree(BA2_h));
+  free(A_h);
+  free(B_h);
+  free(X_h);
+  free(Y_h);
+
+  return true;
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - This test runs on devices where XGMI enables fine-grained communication
+ * between GPUs. This performs a message passing test.
+ * Array A is allocated on Device 0, and remotely on Device 1.
+ * Device 0 also increments atomic ints AA1 and AA2.
+ * Array B is allocated on Device 1, and remotely on Device 0.
+ * Device 1 also increments atomic ints BA1 and BA2.
+ * Kernel 0 will launch on Device 0, and store array X into array A.
+ * Kernel 1 will launch on Device 1, and store array Y into array B.
+ * Kernel 0 will validate that the correct values of array Y are stored in B.
+ * Kernel 1 will validate that the correct values of array X are stored in A.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/synchronization/cache_coherency_gpu_gpu.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.5
+ *    - Test to be run only on AMD.
+ */
+
+TEST_CASE("Unit_cache_coherency_gpu_gpu") {
+  bool passed = true;
+  // Coherency between GPUs accessing local or remote FB.
+  REQUIRE(passed == gpu_to_gpu_coherency());
+}
diff --git a/projects/hip-tests/catch/unit/synchronization/copy_coherency.cc b/projects/hip-tests/catch/unit/synchronization/copy_coherency.cc
index 1e57fa6815..ed2da3e94e 100644
--- a/projects/hip-tests/catch/unit/synchronization/copy_coherency.cc
+++ b/projects/hip-tests/catch/unit/synchronization/copy_coherency.cc
@@ -1,340 +1,340 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_common.hh>
-
-unsigned threadsPerBlock = 256;
-unsigned blocksPerCU = 6;
-
-class MemcpyFunction {
- public:
-    MemcpyFunction(const char* fileName, const char* functionName) {
-      load(fileName, functionName);
-    }
-    void load(const char* fileName, const char* functionName);
-    void launch(int* dst, const int* src, size_t numElements, hipStream_t s);
-
- private:
-    hipFunction_t _function;
-    hipModule_t _module;
-};
-
-
-void MemcpyFunction::load(const char* fileName, const char* functionName) {
-    HIP_CHECK(hipModuleLoad(&_module, fileName));
-    HIP_CHECK(hipModuleGetFunction(&_function, _module, functionName));
-}
-
-void MemcpyFunction::launch(int* dst, const int* src, size_t numElements, hipStream_t s) { // NOLINT
-  struct {
-    int* _dst;
-    const int* _src;
-    size_t _numElements;
-  } args;
-
-  args._dst = dst;
-  args._src = src;
-  args._numElements = numElements;
-
-  size_t size = sizeof(args);
-  void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
-                    HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END};
-  unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock,
-                    numElements);
-  HIP_CHECK(hipModuleLaunchKernel(_function, blocks, 1, 1, threadsPerBlock,
-            1, 1, 0, s, NULL,
-            reinterpret_cast<void**>(&config)));
-}
-
-bool g_warnOnFail = true;
-int g_elementSizes[] = {128 * 1000, 256 * 1000, 16 * 1000 * 1000};
-
-// Set value of array to specified 32-bit integer:
-__global__ void memsetIntKernel(int* ptr, const int val, size_t numElements) {
-  int gid = (blockIdx.x * blockDim.x + threadIdx.x);
-  int stride = blockDim.x * gridDim.x;
-  for (size_t i = gid; i < numElements; i += stride) {
-    ptr[i] = val;
-  }
-}
-
-__global__ void memcpyIntKernel(int* dst, const int* src, size_t numElements) {
-  int gid = (blockIdx.x * blockDim.x + threadIdx.x);
-  int stride = blockDim.x * gridDim.x;
-  for (size_t i = gid; i < numElements; i += stride) {
-      dst[i] = src[i];
-  }
-}
-
-// Check arrays in reverse order, to more easily detect cases where
-// the copy is "partially" done.
-void checkReverse(const int* ptr, int numElements, int expected) {
-  int mismatchCnt = 0;
-  for (int i = numElements - 1; i >= 0; i--) {
-    if (!g_warnOnFail) {
-      REQUIRE(ptr[i] == expected);
-    }
-    if (++mismatchCnt >= 10) {
-        break;
-    }
-  }
-}
-
-#define ENUM_CASE_STR(x)                                                      \
-    case x:                                                                   \
-        return #x
-
-enum CmdType { COPY, KERNEL, MODULE_KERNEL, MAX_CmdType };
-
-const char* CmdTypeStr(CmdType c) {
-    switch (c) {
-        ENUM_CASE_STR(COPY);
-        ENUM_CASE_STR(KERNEL);
-        ENUM_CASE_STR(MODULE_KERNEL);
-        default:
-            return "UNKNOWN";
-    }
-}
-
-enum SyncType {
-  NONE,
-  EVENT_QUERY,
-  EVENT_SYNC,
-  STREAM_WAIT_EVENT,
-  STREAM_QUERY,
-  STREAM_SYNC,
-  DEVICE_SYNC,
-  MAX_SyncType
-};
-
-const char* SyncTypeStr(SyncType s) {
-  switch (s) {
-    ENUM_CASE_STR(NONE);
-    ENUM_CASE_STR(EVENT_QUERY);
-    ENUM_CASE_STR(EVENT_SYNC);
-    ENUM_CASE_STR(STREAM_WAIT_EVENT);
-    ENUM_CASE_STR(STREAM_QUERY);
-    ENUM_CASE_STR(STREAM_SYNC);
-    ENUM_CASE_STR(DEVICE_SYNC);
-    default:
-      return "UNKNOWN";
-  }
-}
-
-void runCmd(CmdType cmd, int* dst, const int* src, hipStream_t s,
-             size_t numElements) {
-  switch (cmd) {
-    case COPY:
-      HIP_CHECK(
-        hipMemcpyAsync(dst, src, numElements * sizeof(int),
-                        hipMemcpyDeviceToDevice, s));
-      break;
-    case KERNEL: {
-      unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
-                                 threadsPerBlock, numElements);
-      hipLaunchKernelGGL(memcpyIntKernel, dim3(blocks), dim3(threadsPerBlock),
-                          0, s, dst, src, numElements);
-    } break;
-    case MODULE_KERNEL: {
-      MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel");
-      g_moduleMemcpy.launch(dst, src, numElements, s);
-    } break;
-    default:
-      printf("Info:unknown cmd=%d type", cmd);
-  }
-}
-
-void resetInputs(int* Ad, int* Bd, int* Ch,
-                 size_t numElements, int expected) {
-  unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
-                                          threadsPerBlock, numElements);
-  hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
-                      0, hipStream_t(0), Ad, expected, numElements);
-  // poison with bad value to ensure is overwritten correctly
-  hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
-                      0, hipStream_t(0), Bd, 0xDEADBEEF, numElements);
-  hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
-                      0, hipStream_t(0), Bd, 0xF000BA55, numElements);
-  memset(Ch, 13, numElements * sizeof(int));
-  HIP_CHECK(hipDeviceSynchronize());
-}
-
-// Intended to test proper synchronization and cache flushing
-// between CMDA and CMDB. CMD are of type CmdType. All command copy memory,
-// using either hipMemcpyAsync or kernel implementations.
-// Some form of synchronization is applied. Then cmdB copies from Bd to Cd.
-// CmdA copies from Ad to Bd, Cd is then copied to host Ch using a memory copy.
-// Correct result at the end is that Ch contains the
-// contents originally in Ad (integer 0x42)
-
-void runTestImpl(CmdType cmdAType, SyncType syncType, CmdType cmdBType,
-                 hipStream_t stream1, hipStream_t stream2, int numElements,
-                 int* Ad, int* Bd, int* Cd, int* Ch, int expected) {
-  hipEvent_t e;
-  HIP_CHECK(hipEventCreateWithFlags(&e, 0));
-
-  resetInputs(Ad, Bd, Ch, numElements, expected);
-
-  const size_t sizeElements = numElements * sizeof(int);
-  fprintf(stderr, "test: runTest with %zu bytes (%6.2f MB) cmdA=%s; sync=%s; cmdB=%s\n", // NOLINT
-          sizeElements, static_cast<double>(sizeElements / 1024.0),
-          CmdTypeStr(cmdAType), SyncTypeStr(syncType), CmdTypeStr(cmdBType));
-
-  /*if (SKIP_MODULE_KERNEL && ((cmdAType == MODULE_KERNEL) || (cmdBType == MODULE_KERNEL))) { // NOLINT
-    fprintf(stderr, "warn: skipping since test infra does not yet support modules\n"); // NOLINT
-    return;
-  }*/
-
-  // Step A:
-  runCmd(cmdAType, Bd, Ad, stream1, numElements);
-
-  // Sync in-between?
-  switch (syncType) {
-    case NONE:
-      break;
-    case EVENT_QUERY: {
-      hipError_t st = hipErrorNotReady;
-      HIP_CHECK(hipEventRecord(e, stream1));
-      do {
-          st = hipEventQuery(e);
-      } while (st == hipErrorNotReady);
-      HIP_CHECK(st);
-    } break;
-    case EVENT_SYNC:
-      HIP_CHECK(hipEventRecord(e, stream1));
-      HIP_CHECK(hipEventSynchronize(e));
-      break;
-    case STREAM_WAIT_EVENT:
-      HIP_CHECK(hipEventRecord(e, stream1));
-      HIP_CHECK(hipStreamWaitEvent(stream2, e, 0));
-      break;
-    case STREAM_QUERY: {
-      hipError_t st = hipErrorNotReady;
-      do {
-          st = hipStreamQuery(stream1);
-      } while (st == hipErrorNotReady);
-      HIP_CHECK(st);
-    } break;
-    case STREAM_SYNC:
-      HIP_CHECK(hipStreamSynchronize(stream1));
-      break;
-    case DEVICE_SYNC:
-      HIP_CHECK(hipDeviceSynchronize());
-      break;
-    default:
-      fprintf(stderr, "warning: unknown sync type=%s", SyncTypeStr(syncType));
-      return;
-  }
-  runCmd(cmdBType, Cd, Bd, stream2, numElements);
-
-  // Copy back to host, use async copy to avoid any extra synchronization
-  //  that might mask issues.
-  HIP_CHECK(hipMemcpyAsync(Ch, Cd, sizeElements, hipMemcpyDeviceToHost,
-                            stream2));
-  HIP_CHECK(hipStreamSynchronize(stream2));
-
-  checkReverse(Ch, numElements, expected);
-
-  HIP_CHECK(hipEventDestroy(e));
-}
-
-void testWrapper(size_t numElements) {
-  const size_t sizeElements = numElements * sizeof(int);
-  const int expected = 0x42;
-  int *Ad, *Bd, *Cd, *Ch;
-
-  HIP_CHECK(hipMalloc(&Ad, sizeElements));
-  HIP_CHECK(hipMalloc(&Bd, sizeElements));
-  HIP_CHECK(hipMalloc(&Cd, sizeElements));
-  HIP_CHECK(hipHostMalloc(&Ch, sizeElements));
-
-  hipStream_t stream1, stream2;
-
-  HIP_CHECK(hipStreamCreate(&stream1));
-  HIP_CHECK(hipStreamCreate(&stream2));
-  HIP_CHECK(hipDeviceSynchronize());
-
-  runTestImpl(COPY, EVENT_SYNC, KERNEL, stream1, stream2, numElements,
-              Ad, Bd, Cd, Ch, expected);
-
-  for (int cmdA = 0; cmdA < MAX_CmdType; cmdA++) {
-    for (int cmdB = 0; cmdB < MAX_CmdType; cmdB++) {
-      for (int syncMode = 0; syncMode < MAX_SyncType; syncMode++) {
-        switch (syncMode) {
-          // case NONE::
-          case EVENT_QUERY:
-          case EVENT_SYNC:
-          case STREAM_WAIT_EVENT:
-          // case STREAM_QUERY:
-          case STREAM_SYNC:
-          case DEVICE_SYNC:
-            runTestImpl(CmdType(cmdA), SyncType(syncMode), CmdType(cmdB),
-                      stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected);
-            break;
-          default:
-            break;
-        }
-      }
-    }
-  }
-
-#if 0
-  runTestImpl(COPY, STREAM_SYNC, MODULE_KERNEL, stream1, stream2,
-              numElements, Ad, Bd, Cd, Ch, expected);
-  runTestImpl(COPY, STREAM_SYNC, KERNEL, stream1, stream2, numElements,
-              Ad, Bd, Cd, Ch, expected);
-  runTestImpl(COPY, STREAM_WAIT_EVENT, MODULE_KERNEL, stream1, stream2,
-               numElements, Ad, Bd, Cd, Ch, expected);
-  runTestImpl(COPY, STREAM_WAIT_EVENT, KERNEL, stream1, stream2, numElements,
-              Ad, Bd, Cd, Ch, expected);
-#endif
-
-  HIP_CHECK(hipFree(Ad));
-  HIP_CHECK(hipFree(Bd));
-  HIP_CHECK(hipFree(Cd));
-  HIP_CHECK(hipHostFree(Ch));
-
-  HIP_CHECK(hipStreamDestroy(stream1));
-  HIP_CHECK(hipStreamDestroy(stream2));
-}
-
-/**
- * Test Description
- * ------------------------
- *    - Test cache management (fences) and synchronization between
- * kernel and copy commands. Exhaustively tests 3 command types
- * (copy, kernel, module kernel), many sync types (see SyncType), followed by
- *  another command, across a sweep of data sizes designed to stress
- * various levels of the memory hierarchy.
-
- * Test source
- * ------------------------
- *    - catch/unit/synchronization/copy_coherency.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.5
- */
-
-TEST_CASE("Unit_Copy_Coherency") {
-  for (int index = 0; index < sizeof(g_elementSizes) / sizeof(int); index++) {
-    size_t numElements = g_elementSizes[index];
-    testWrapper(numElements);
-  }
-}
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_common.hh>
+
+unsigned threadsPerBlock = 256;
+unsigned blocksPerCU = 6;
+
+class MemcpyFunction {
+ public:
+    MemcpyFunction(const char* fileName, const char* functionName) {
+      load(fileName, functionName);
+    }
+    void load(const char* fileName, const char* functionName);
+    void launch(int* dst, const int* src, size_t numElements, hipStream_t s);
+
+ private:
+    hipFunction_t _function;
+    hipModule_t _module;
+};
+
+
+void MemcpyFunction::load(const char* fileName, const char* functionName) {
+    HIP_CHECK(hipModuleLoad(&_module, fileName));
+    HIP_CHECK(hipModuleGetFunction(&_function, _module, functionName));
+}
+
+void MemcpyFunction::launch(int* dst, const int* src, size_t numElements, hipStream_t s) { // NOLINT
+  struct {
+    int* _dst;
+    const int* _src;
+    size_t _numElements;
+  } args;
+
+  args._dst = dst;
+  args._src = src;
+  args._numElements = numElements;
+
+  size_t size = sizeof(args);
+  void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
+                    HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END};
+  unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock,
+                    numElements);
+  HIP_CHECK(hipModuleLaunchKernel(_function, blocks, 1, 1, threadsPerBlock,
+            1, 1, 0, s, NULL,
+            reinterpret_cast<void**>(&config)));
+}
+
+bool g_warnOnFail = true;
+int g_elementSizes[] = {128 * 1000, 256 * 1000, 16 * 1000 * 1000};
+
+// Set value of array to specified 32-bit integer:
+__global__ void memsetIntKernel(int* ptr, const int val, size_t numElements) {
+  int gid = (blockIdx.x * blockDim.x + threadIdx.x);
+  int stride = blockDim.x * gridDim.x;
+  for (size_t i = gid; i < numElements; i += stride) {
+    ptr[i] = val;
+  }
+}
+
+__global__ void memcpyIntKernel(int* dst, const int* src, size_t numElements) {
+  int gid = (blockIdx.x * blockDim.x + threadIdx.x);
+  int stride = blockDim.x * gridDim.x;
+  for (size_t i = gid; i < numElements; i += stride) {
+      dst[i] = src[i];
+  }
+}
+
+// Check arrays in reverse order, to more easily detect cases where
+// the copy is "partially" done.
+void checkReverse(const int* ptr, int numElements, int expected) {
+  int mismatchCnt = 0;
+  for (int i = numElements - 1; i >= 0; i--) {
+    if (!g_warnOnFail) {
+      REQUIRE(ptr[i] == expected);
+    }
+    if (++mismatchCnt >= 10) {
+        break;
+    }
+  }
+}
+
+#define ENUM_CASE_STR(x)                                                      \
+    case x:                                                                   \
+        return #x
+
+enum CmdType { COPY, KERNEL, MODULE_KERNEL, MAX_CmdType };
+
+const char* CmdTypeStr(CmdType c) {
+    switch (c) {
+        ENUM_CASE_STR(COPY);
+        ENUM_CASE_STR(KERNEL);
+        ENUM_CASE_STR(MODULE_KERNEL);
+        default:
+            return "UNKNOWN";
+    }
+}
+
+enum SyncType {
+  NONE,
+  EVENT_QUERY,
+  EVENT_SYNC,
+  STREAM_WAIT_EVENT,
+  STREAM_QUERY,
+  STREAM_SYNC,
+  DEVICE_SYNC,
+  MAX_SyncType
+};
+
+const char* SyncTypeStr(SyncType s) {
+  switch (s) {
+    ENUM_CASE_STR(NONE);
+    ENUM_CASE_STR(EVENT_QUERY);
+    ENUM_CASE_STR(EVENT_SYNC);
+    ENUM_CASE_STR(STREAM_WAIT_EVENT);
+    ENUM_CASE_STR(STREAM_QUERY);
+    ENUM_CASE_STR(STREAM_SYNC);
+    ENUM_CASE_STR(DEVICE_SYNC);
+    default:
+      return "UNKNOWN";
+  }
+}
+
+void runCmd(CmdType cmd, int* dst, const int* src, hipStream_t s,
+             size_t numElements) {
+  switch (cmd) {
+    case COPY:
+      HIP_CHECK(
+        hipMemcpyAsync(dst, src, numElements * sizeof(int),
+                        hipMemcpyDeviceToDevice, s));
+      break;
+    case KERNEL: {
+      unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
+                                 threadsPerBlock, numElements);
+      hipLaunchKernelGGL(memcpyIntKernel, dim3(blocks), dim3(threadsPerBlock),
+                          0, s, dst, src, numElements);
+    } break;
+    case MODULE_KERNEL: {
+      MemcpyFunction g_moduleMemcpy("memcpyInt.hsaco", "memcpyIntKernel");
+      g_moduleMemcpy.launch(dst, src, numElements, s);
+    } break;
+    default:
+      printf("Info:unknown cmd=%d type", cmd);
+  }
+}
+
+void resetInputs(int* Ad, int* Bd, int* Ch,
+                 size_t numElements, int expected) {
+  unsigned blocks = HipTest::setNumBlocks(blocksPerCU,
+                                          threadsPerBlock, numElements);
+  hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
+                      0, hipStream_t(0), Ad, expected, numElements);
+  // poison with bad value to ensure is overwritten correctly
+  hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
+                      0, hipStream_t(0), Bd, 0xDEADBEEF, numElements);
+  hipLaunchKernelGGL(memsetIntKernel, dim3(blocks), dim3(threadsPerBlock),
+                      0, hipStream_t(0), Bd, 0xF000BA55, numElements);
+  memset(Ch, 13, numElements * sizeof(int));
+  HIP_CHECK(hipDeviceSynchronize());
+}
+
+// Intended to test proper synchronization and cache flushing
+// between CMDA and CMDB. CMD are of type CmdType. All command copy memory,
+// using either hipMemcpyAsync or kernel implementations.
+// Some form of synchronization is applied. Then cmdB copies from Bd to Cd.
+// CmdA copies from Ad to Bd, Cd is then copied to host Ch using a memory copy.
+// Correct result at the end is that Ch contains the
+// contents originally in Ad (integer 0x42)
+
+void runTestImpl(CmdType cmdAType, SyncType syncType, CmdType cmdBType,
+                 hipStream_t stream1, hipStream_t stream2, int numElements,
+                 int* Ad, int* Bd, int* Cd, int* Ch, int expected) {
+  hipEvent_t e;
+  HIP_CHECK(hipEventCreateWithFlags(&e, 0));
+
+  resetInputs(Ad, Bd, Ch, numElements, expected);
+
+  const size_t sizeElements = numElements * sizeof(int);
+  fprintf(stderr, "test: runTest with %zu bytes (%6.2f MB) cmdA=%s; sync=%s; cmdB=%s\n", // NOLINT
+          sizeElements, static_cast<double>(sizeElements / 1024.0),
+          CmdTypeStr(cmdAType), SyncTypeStr(syncType), CmdTypeStr(cmdBType));
+
+  /*if (SKIP_MODULE_KERNEL && ((cmdAType == MODULE_KERNEL) || (cmdBType == MODULE_KERNEL))) { // NOLINT
+    fprintf(stderr, "warn: skipping since test infra does not yet support modules\n"); // NOLINT
+    return;
+  }*/
+
+  // Step A:
+  runCmd(cmdAType, Bd, Ad, stream1, numElements);
+
+  // Sync in-between?
+  switch (syncType) {
+    case NONE:
+      break;
+    case EVENT_QUERY: {
+      hipError_t st = hipErrorNotReady;
+      HIP_CHECK(hipEventRecord(e, stream1));
+      do {
+          st = hipEventQuery(e);
+      } while (st == hipErrorNotReady);
+      HIP_CHECK(st);
+    } break;
+    case EVENT_SYNC:
+      HIP_CHECK(hipEventRecord(e, stream1));
+      HIP_CHECK(hipEventSynchronize(e));
+      break;
+    case STREAM_WAIT_EVENT:
+      HIP_CHECK(hipEventRecord(e, stream1));
+      HIP_CHECK(hipStreamWaitEvent(stream2, e, 0));
+      break;
+    case STREAM_QUERY: {
+      hipError_t st = hipErrorNotReady;
+      do {
+          st = hipStreamQuery(stream1);
+      } while (st == hipErrorNotReady);
+      HIP_CHECK(st);
+    } break;
+    case STREAM_SYNC:
+      HIP_CHECK(hipStreamSynchronize(stream1));
+      break;
+    case DEVICE_SYNC:
+      HIP_CHECK(hipDeviceSynchronize());
+      break;
+    default:
+      fprintf(stderr, "warning: unknown sync type=%s", SyncTypeStr(syncType));
+      return;
+  }
+  runCmd(cmdBType, Cd, Bd, stream2, numElements);
+
+  // Copy back to host, use async copy to avoid any extra synchronization
+  //  that might mask issues.
+  HIP_CHECK(hipMemcpyAsync(Ch, Cd, sizeElements, hipMemcpyDeviceToHost,
+                            stream2));
+  HIP_CHECK(hipStreamSynchronize(stream2));
+
+  checkReverse(Ch, numElements, expected);
+
+  HIP_CHECK(hipEventDestroy(e));
+}
+
+void testWrapper(size_t numElements) {
+  const size_t sizeElements = numElements * sizeof(int);
+  const int expected = 0x42;
+  int *Ad, *Bd, *Cd, *Ch;
+
+  HIP_CHECK(hipMalloc(&Ad, sizeElements));
+  HIP_CHECK(hipMalloc(&Bd, sizeElements));
+  HIP_CHECK(hipMalloc(&Cd, sizeElements));
+  HIP_CHECK(hipHostMalloc(&Ch, sizeElements));
+
+  hipStream_t stream1, stream2;
+
+  HIP_CHECK(hipStreamCreate(&stream1));
+  HIP_CHECK(hipStreamCreate(&stream2));
+  HIP_CHECK(hipDeviceSynchronize());
+
+  runTestImpl(COPY, EVENT_SYNC, KERNEL, stream1, stream2, numElements,
+              Ad, Bd, Cd, Ch, expected);
+
+  for (int cmdA = 0; cmdA < MAX_CmdType; cmdA++) {
+    for (int cmdB = 0; cmdB < MAX_CmdType; cmdB++) {
+      for (int syncMode = 0; syncMode < MAX_SyncType; syncMode++) {
+        switch (syncMode) {
+          // case NONE::
+          case EVENT_QUERY:
+          case EVENT_SYNC:
+          case STREAM_WAIT_EVENT:
+          // case STREAM_QUERY:
+          case STREAM_SYNC:
+          case DEVICE_SYNC:
+            runTestImpl(CmdType(cmdA), SyncType(syncMode), CmdType(cmdB),
+                      stream1, stream2, numElements, Ad, Bd, Cd, Ch, expected);
+            break;
+          default:
+            break;
+        }
+      }
+    }
+  }
+
+#if 0
+  runTestImpl(COPY, STREAM_SYNC, MODULE_KERNEL, stream1, stream2,
+              numElements, Ad, Bd, Cd, Ch, expected);
+  runTestImpl(COPY, STREAM_SYNC, KERNEL, stream1, stream2, numElements,
+              Ad, Bd, Cd, Ch, expected);
+  runTestImpl(COPY, STREAM_WAIT_EVENT, MODULE_KERNEL, stream1, stream2,
+               numElements, Ad, Bd, Cd, Ch, expected);
+  runTestImpl(COPY, STREAM_WAIT_EVENT, KERNEL, stream1, stream2, numElements,
+              Ad, Bd, Cd, Ch, expected);
+#endif
+
+  HIP_CHECK(hipFree(Ad));
+  HIP_CHECK(hipFree(Bd));
+  HIP_CHECK(hipFree(Cd));
+  HIP_CHECK(hipHostFree(Ch));
+
+  HIP_CHECK(hipStreamDestroy(stream1));
+  HIP_CHECK(hipStreamDestroy(stream2));
+}
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Test cache management (fences) and synchronization between
+ * kernel and copy commands. Exhaustively tests 3 command types
+ * (copy, kernel, module kernel), many sync types (see SyncType), followed by
+ *  another command, across a sweep of data sizes designed to stress
+ * various levels of the memory hierarchy.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/synchronization/copy_coherency.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.5
+ */
+
+TEST_CASE("Unit_Copy_Coherency") {
+  for (int index = 0; index < sizeof(g_elementSizes) / sizeof(int); index++) {
+    size_t numElements = g_elementSizes[index];
+    testWrapper(numElements);
+  }
+}
diff --git a/projects/hip-tests/catch/unit/warp/hipShflTests.cc b/projects/hip-tests/catch/unit/warp/hipShflTests.cc
index af7faa4525..ed66571bca 100644
--- a/projects/hip-tests/catch/unit/warp/hipShflTests.cc
+++ b/projects/hip-tests/catch/unit/warp/hipShflTests.cc
@@ -1,182 +1,182 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
-#include <hip/hip_fp16.h>
-
-#define WIDTH 4
-
-#define NUM (WIDTH * WIDTH)
-
-#define THREADS_PER_BLOCK_X 4
-#define THREADS_PER_BLOCK_Y 4
-#define THREADS_PER_BLOCK_Z 1
-
-// Device (Kernel) function, it must be void
-template <typename T> __global__ void matrixTranspose(T* out, T* in, const int width) {
-  int x = blockDim.x * blockIdx.x + threadIdx.x;
-  T val = in[x];
-  for (int i = 0; i < width; i++) {
-    for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i);
-  }
-}
-
-// CPU implementation of matrix transpose
-template <typename T>
-void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) {
-  for (unsigned int j = 0; j < width; j++) {
-    for (unsigned int i = 0; i < width; i++) {
-      output[i * width + j] = input[j * width + i];
-    }
-  }
-}
-
-static void getFactor(int* fact) { *fact = 101; }
-static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
-static void getFactor(float* fact) { *fact = 2.5; }
-static void getFactor(__half* fact) { *fact = 2.5; }
-static void getFactor(double* fact) { *fact = 2.5; }
-static void getFactor(int64_t* fact) { *fact = 303; }
-static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
-
-template <typename T> int compare(T* TransposeMatrix, T* cpuTransposeMatrix) {
-  int errors = 0;
-  for (int i = 0; i < NUM; i++) {
-    if (TransposeMatrix[i] != cpuTransposeMatrix[i]) {
-      errors++;
-    }
-  }
-  return errors;
-}
-
-template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) {
-  int errors = 0;
-  for (int i = 0; i < NUM; i++) {
-    if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) {  // NOLINT
-      errors++;
-    }
-  }
-  return errors;
-}
-
-template <typename T> void init(T* Matrix) {
-  // initialize the input data
-  T factor;
-  getFactor(&factor);
-  for (int i = 0; i < NUM; i++) {
-    Matrix[i] = (T)i + factor;
-  }
-}
-
-template <> void init(__half* Matrix) {
-  // initialize the input data
-  __half factor;
-  getFactor(&factor);
-  for (int i = 0; i < NUM; i++) {
-    Matrix[i] = i + __half2float(factor);
-  }
-}
-
-template <typename T> static void runTest() {
-  T* Matrix;
-  T* TransposeMatrix;
-  T* cpuTransposeMatrix;
-
-  T* gpuMatrix;
-  T* gpuTransposeMatrix;
-
-  hipDeviceProp_t devProp;
-  HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
-
-  int errors = 0;
-
-  Matrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
-  TransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
-  cpuTransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
-
-  init(Matrix);
-
-  // allocate the memory on the device side
-  HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuMatrix), NUM * sizeof(T)));
-  HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuTransposeMatrix), NUM * sizeof(T)));
-
-  // Memory transfer from host to device
-  HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice));
-
-  // Lauching kernel from host
-  hipLaunchKernelGGL(matrixTranspose<T>, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y),
-                     0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);
-
-  // Memory transfer from device to host
-  HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost));
-
-  // CPU MatrixTranspose computation
-  matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
-
-  // verify the results
-  REQUIRE(errors == compare(TransposeMatrix, cpuTransposeMatrix));
-  // free the resources on device side
-  HIP_CHECK(hipFree(gpuMatrix));
-  HIP_CHECK(hipFree(gpuTransposeMatrix));
-
-  // free the resources on host side
-  free(Matrix);
-  free(TransposeMatrix);
-  free(cpuTransposeMatrix);
-}
-
-/**
- * @addtogroup __shfl __shfl
- * @{
- * @ingroup ShflTest
- * `T  __shfl(T var, int srcLane, int width=warpSize)` -
- * Contains wrap __shfl functions.
- * @}
- */
-
-/**
- * Test Description
- * ------------------------
- * - Test case to verify __shfl warp functions for different datatypes.
-
- * Test source
- * ------------------------
- *    - catch/unit/kernel/hipShflTests.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.6
- */
-
-TEST_CASE("Unit_hipShflTests") {
-  SECTION("run test for int") { runTest<int>(); }
-  SECTION("run test for float") { runTest<float>(); }
-  SECTION("run test for double") { runTest<double>(); }
-  // Test added to support half datatype.
-  SECTION("run test for __half") { runTest<__half>(); }
-  SECTION("run test for int64_t") { runTest<int64_t>(); }
-  SECTION("run test for unsigned int") { runTest<unsigned int>(); }
-  SECTION("run test for uint64_t") { runTest<uint64_t>(); }
-}
-
-/**
-* End doxygen group ShflTest.
-* @}
-*/
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+#include <hip/hip_fp16.h>
+
+#define WIDTH 4
+
+#define NUM (WIDTH * WIDTH)
+
+#define THREADS_PER_BLOCK_X 4
+#define THREADS_PER_BLOCK_Y 4
+#define THREADS_PER_BLOCK_Z 1
+
+// Device (Kernel) function, it must be void
+template <typename T> __global__ void matrixTranspose(T* out, T* in, const int width) {
+  int x = blockDim.x * blockIdx.x + threadIdx.x;
+  T val = in[x];
+  for (int i = 0; i < width; i++) {
+    for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i);
+  }
+}
+
+// CPU implementation of matrix transpose
+template <typename T>
+void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) {
+  for (unsigned int j = 0; j < width; j++) {
+    for (unsigned int i = 0; i < width; i++) {
+      output[i * width + j] = input[j * width + i];
+    }
+  }
+}
+
+static void getFactor(int* fact) { *fact = 101; }
+static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
+static void getFactor(float* fact) { *fact = 2.5; }
+static void getFactor(__half* fact) { *fact = 2.5; }
+static void getFactor(double* fact) { *fact = 2.5; }
+static void getFactor(int64_t* fact) { *fact = 303; }
+static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
+
+template <typename T> int compare(T* TransposeMatrix, T* cpuTransposeMatrix) {
+  int errors = 0;
+  for (int i = 0; i < NUM; i++) {
+    if (TransposeMatrix[i] != cpuTransposeMatrix[i]) {
+      errors++;
+    }
+  }
+  return errors;
+}
+
+template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) {
+  int errors = 0;
+  for (int i = 0; i < NUM; i++) {
+    if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) {  // NOLINT
+      errors++;
+    }
+  }
+  return errors;
+}
+
+template <typename T> void init(T* Matrix) {
+  // initialize the input data
+  T factor;
+  getFactor(&factor);
+  for (int i = 0; i < NUM; i++) {
+    Matrix[i] = (T)i + factor;
+  }
+}
+
+template <> void init(__half* Matrix) {
+  // initialize the input data
+  __half factor;
+  getFactor(&factor);
+  for (int i = 0; i < NUM; i++) {
+    Matrix[i] = i + __half2float(factor);
+  }
+}
+
+template <typename T> static void runTest() {
+  T* Matrix;
+  T* TransposeMatrix;
+  T* cpuTransposeMatrix;
+
+  T* gpuMatrix;
+  T* gpuTransposeMatrix;
+
+  hipDeviceProp_t devProp;
+  HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
+
+  int errors = 0;
+
+  Matrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
+  TransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
+  cpuTransposeMatrix = reinterpret_cast<T*>(malloc(NUM * sizeof(T)));
+
+  init(Matrix);
+
+  // allocate the memory on the device side
+  HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuMatrix), NUM * sizeof(T)));
+  HIP_CHECK(hipMalloc(reinterpret_cast<void**>(&gpuTransposeMatrix), NUM * sizeof(T)));
+
+  // Memory transfer from host to device
+  HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice));
+
+  // Lauching kernel from host
+  hipLaunchKernelGGL(matrixTranspose<T>, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y),
+                     0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);
+
+  // Memory transfer from device to host
+  HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost));
+
+  // CPU MatrixTranspose computation
+  matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
+
+  // verify the results
+  REQUIRE(errors == compare(TransposeMatrix, cpuTransposeMatrix));
+  // free the resources on device side
+  HIP_CHECK(hipFree(gpuMatrix));
+  HIP_CHECK(hipFree(gpuTransposeMatrix));
+
+  // free the resources on host side
+  free(Matrix);
+  free(TransposeMatrix);
+  free(cpuTransposeMatrix);
+}
+
+/**
+ * @addtogroup __shfl __shfl
+ * @{
+ * @ingroup ShflTest
+ * `T  __shfl(T var, int srcLane, int width=warpSize)` -
+ * Contains wrap __shfl functions.
+ * @}
+ */
+
+/**
+ * Test Description
+ * ------------------------
+ * - Test case to verify __shfl warp functions for different datatypes.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/kernel/hipShflTests.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.6
+ */
+
+TEST_CASE("Unit_hipShflTests") {
+  SECTION("run test for int") { runTest<int>(); }
+  SECTION("run test for float") { runTest<float>(); }
+  SECTION("run test for double") { runTest<double>(); }
+  // Test added to support half datatype.
+  SECTION("run test for __half") { runTest<__half>(); }
+  SECTION("run test for int64_t") { runTest<int64_t>(); }
+  SECTION("run test for unsigned int") { runTest<unsigned int>(); }
+  SECTION("run test for uint64_t") { runTest<uint64_t>(); }
+}
+
+/**
+* End doxygen group ShflTest.
+* @}
+*/
diff --git a/projects/hip-tests/catch/unit/warp/hipShflUpDownTest.cc b/projects/hip-tests/catch/unit/warp/hipShflUpDownTest.cc
index a06216f03d..0a95f52810 100644
--- a/projects/hip-tests/catch/unit/warp/hipShflUpDownTest.cc
+++ b/projects/hip-tests/catch/unit/warp/hipShflUpDownTest.cc
@@ -1,241 +1,241 @@
-/*
-Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-#include <hip_test_kernels.hh>
-#include <hip_test_checkers.hh>
-#include <hip_test_common.hh>
-#include <hip/hip_fp16.h>
-
-const int size = 32;
-
-template <typename T> __global__ void shflDownSum(T* a, int size) {
-  T val = a[threadIdx.x];
-  for (int i = size / 2; i > 0; i /= 2) {
-    val += __shfl_down(val, i, size);
-  }
-  a[threadIdx.x] = val;
-}
-
-template <typename T> __global__ void shflUpSum(T* a, int size) {
-  T val = a[threadIdx.x];
-  for (int i = size / 2; i > 0; i /= 2) {
-    val += __shfl_up(val, i, size);
-  }
-  a[threadIdx.x] = val;
-}
-
-template <typename T> __global__ void shflXorSum(T* a, int size) {
-  T val = a[threadIdx.x];
-  for (int i = size / 2; i > 0; i /= 2) {
-    val += __shfl_xor(val, i, size);
-  }
-  a[threadIdx.x] = val;
-}
-
-static void getFactor(int* fact) { *fact = 101; }
-static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
-static void getFactor(float* fact) { *fact = 2.5; }
-static void getFactor(double* fact) { *fact = 2.5; }
-static void getFactor(__half* fact) { *fact = 2.5; }
-static void getFactor(int64_t* fact) { *fact = 303; }
-static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
-
-template <typename T> T sum(T* a) {
-  T cpuSum = 0;
-  T factor;
-  getFactor(&factor);
-  for (int i = 0; i < size; i++) {
-    a[i] = i + factor;
-    cpuSum += a[i];
-  }
-  return cpuSum;
-}
-
-template <> __half sum(__half* a) {
-  __half cpuSum = 0;
-  __half factor;
-  getFactor(&factor);
-  for (int i = 0; i < size; i++) {
-    a[i] = i + __half2float(factor);
-    cpuSum = __half2float(cpuSum) + __half2float(a[i]);
-  }
-  return cpuSum;
-}
-
-template <typename T> bool compare(T gpuSum, T cpuSum) {
-  if (gpuSum != cpuSum) {
-    return true;
-  }
-  return false;
-}
-
-template <> bool compare(__half gpuSum, __half cpuSum) {
-  if (__half2float(gpuSum) != __half2float(cpuSum)) {
-    return true;
-  }
-  return false;
-}
-
-template <typename T> static void runTestShflUp() {
-  const int size = 32;
-  T a[size];
-  T cpuSum = sum(a);
-  T* d_a;
-  HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
-  HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
-  hipLaunchKernelGGL(shflUpSum<T>, 1, size, 0, 0, d_a, size);
-  HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
-  REQUIRE((compare(a[size - 1], cpuSum)) == 0);
-  HIP_CHECK(hipFree(d_a));
-}
-
-template <typename T> static void runTestShflDown() {
-  T a[size];
-  T cpuSum = sum(a);
-  T* d_a;
-  HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
-  HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
-  hipLaunchKernelGGL(shflDownSum<T>, 1, size, 0, 0, d_a, size);
-  HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
-  REQUIRE((compare(a[0], cpuSum)) == 0);
-  HIP_CHECK(hipFree(d_a));
-}
-
-template <typename T> static void runTestShflXor() {
-  T a[size];
-  T cpuSum = sum(a);
-  T* d_a;
-  HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
-  HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
-  hipLaunchKernelGGL(shflXorSum<T>, 1, size, 0, 0, d_a, size);
-  HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
-  REQUIRE((compare(a[0], cpuSum)) == 0);
-  HIP_CHECK(hipFree(d_a));
-}
-
-/**
- * @addtogroup __shfl __shfl
- * @{
- * @ingroup ShflTest
- * `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` -
- * Contains warp __shfl_up function
- */
-
-/**
- * Test Description
- * ------------------------
- *    - Test case to verify __shfl_up warp functions for different datatypes.
-
- * Test source
- * ------------------------
- *    - catch/unit/kernel/hipShflUpDownTest.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.6
- *    - Gaurding this test against cuda with refernce to mentioned
- * ticket SWDEV-379177
- */
-
-TEST_CASE("Unit_runTestShfl_up") {
-  SECTION("runTestShflUp for int") { runTestShflUp<int>(); }
-  SECTION("runTestShflUp for float") { runTestShflUp<float>(); }
-  SECTION("runTestShflUp for double") { runTestShflUp<double>(); }
-  SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); }
-  SECTION("runTestShflUp for int64_t") { runTestShflUp<int64_t>(); }
-  SECTION("runTestShflUp for unsigned int") { runTestShflUp<unsigned int>(); }
-  SECTION("runTestShflUp for uint64_t") { runTestShflUp<uint64_t>(); }
-}
-/**
- * End doxygen group __shfl.
- * @}
- */
-
-/**
- * @addtogroup __shfl __shfl
- * @{
- * @ingroup ShflTest
- * `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` -
- * Contains warp __shfl_down function
- */
-
-/**
- * Test Description
- * ------------------------
- *    - Test case to verify __shfl_down warp functions for different datatypes.
-
- * Test source
- * ------------------------
- *    - catch/unit/kernel/hipShflUpDownTest.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.6
- *    - Gaurding this test against cuda with refernce to mentioned
- * ticket SWDEV-379177
- */
-
-TEST_CASE("Unit_runTestShfl_Down") {
-  SECTION("runTestShflDown for int") { runTestShflDown<int>(); }
-  SECTION("runTestShflDown for float") { runTestShflDown<float>(); }
-  SECTION("runTestShflDown for double") { runTestShflDown<double>(); }
-  SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); }
-  SECTION("runTestShflDown for int64_t") { runTestShflDown<int64_t>(); }
-  SECTION("runTestShflDown for unsigned int") { runTestShflDown<unsigned int>(); }
-  SECTION("runTestShflDown for uint64_t") { runTestShflDown<uint64_t>(); }
-}
-/**
- * End doxygen group __shfl.
- * @}
- */
-
-/**
- * @addtogroup __shfl __shfl
- * @{
- * @ingroup ShflTest
- * `T __shfl_xor(T var, int laneMask, int width=warpSize)` -
- * Contains warp __shfl_xor function
- */
-
-/**
- * Test Description
- * ------------------------
- *    - Test case to verify __shfl_xor warp functions for different datatypes.
-
- * Test source
- * ------------------------
- *    - catch/unit/kernel/hipShflUpDownTest.cc
- * Test requirements
- * ------------------------
- *    - HIP_VERSION >= 5.6
- *    - Gaurding this test against cuda with refernce to mentioned
- * ticket SWDEV-379177
- */
-
-TEST_CASE("Unit_runTestShfl_Xor") {
-  SECTION("runTestShflXor for int") { runTestShflXor<int>(); }
-  SECTION("runTestShflXor for float") { runTestShflXor<float>(); }
-  SECTION("runTestShflXor for double") { runTestShflXor<double>(); }
-  SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); }
-  SECTION("runTestShflXor for int64_t") { runTestShflXor<int64_t>(); }
-  SECTION("runTestShflXor for unsigned int") { runTestShflXor<unsigned int>(); }
-  SECTION("runTestShflXor for uint64_t") { runTestShflXor<uint64_t>(); }
-}
-/**
- * End doxygen group __shfl.
- * @}
- */
+/*
+Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include <hip_test_kernels.hh>
+#include <hip_test_checkers.hh>
+#include <hip_test_common.hh>
+#include <hip/hip_fp16.h>
+
+const int size = 32;
+
+template <typename T> __global__ void shflDownSum(T* a, int size) {
+  T val = a[threadIdx.x];
+  for (int i = size / 2; i > 0; i /= 2) {
+    val += __shfl_down(val, i, size);
+  }
+  a[threadIdx.x] = val;
+}
+
+template <typename T> __global__ void shflUpSum(T* a, int size) {
+  T val = a[threadIdx.x];
+  for (int i = size / 2; i > 0; i /= 2) {
+    val += __shfl_up(val, i, size);
+  }
+  a[threadIdx.x] = val;
+}
+
+template <typename T> __global__ void shflXorSum(T* a, int size) {
+  T val = a[threadIdx.x];
+  for (int i = size / 2; i > 0; i /= 2) {
+    val += __shfl_xor(val, i, size);
+  }
+  a[threadIdx.x] = val;
+}
+
+static void getFactor(int* fact) { *fact = 101; }
+static void getFactor(unsigned int* fact) { *fact = static_cast<unsigned int>(INT32_MAX) + 1; }
+static void getFactor(float* fact) { *fact = 2.5; }
+static void getFactor(double* fact) { *fact = 2.5; }
+static void getFactor(__half* fact) { *fact = 2.5; }
+static void getFactor(int64_t* fact) { *fact = 303; }
+static void getFactor(uint64_t* fact) { *fact = static_cast<uint64_t>(__LONG_LONG_MAX__) + 1; }
+
+template <typename T> T sum(T* a) {
+  T cpuSum = 0;
+  T factor;
+  getFactor(&factor);
+  for (int i = 0; i < size; i++) {
+    a[i] = i + factor;
+    cpuSum += a[i];
+  }
+  return cpuSum;
+}
+
+template <> __half sum(__half* a) {
+  __half cpuSum = 0;
+  __half factor;
+  getFactor(&factor);
+  for (int i = 0; i < size; i++) {
+    a[i] = i + __half2float(factor);
+    cpuSum = __half2float(cpuSum) + __half2float(a[i]);
+  }
+  return cpuSum;
+}
+
+template <typename T> bool compare(T gpuSum, T cpuSum) {
+  if (gpuSum != cpuSum) {
+    return true;
+  }
+  return false;
+}
+
+template <> bool compare(__half gpuSum, __half cpuSum) {
+  if (__half2float(gpuSum) != __half2float(cpuSum)) {
+    return true;
+  }
+  return false;
+}
+
+template <typename T> static void runTestShflUp() {
+  const int size = 32;
+  T a[size];
+  T cpuSum = sum(a);
+  T* d_a;
+  HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
+  HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
+  hipLaunchKernelGGL(shflUpSum<T>, 1, size, 0, 0, d_a, size);
+  HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
+  REQUIRE((compare(a[size - 1], cpuSum)) == 0);
+  HIP_CHECK(hipFree(d_a));
+}
+
+template <typename T> static void runTestShflDown() {
+  T a[size];
+  T cpuSum = sum(a);
+  T* d_a;
+  HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
+  HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
+  hipLaunchKernelGGL(shflDownSum<T>, 1, size, 0, 0, d_a, size);
+  HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
+  REQUIRE((compare(a[0], cpuSum)) == 0);
+  HIP_CHECK(hipFree(d_a));
+}
+
+template <typename T> static void runTestShflXor() {
+  T a[size];
+  T cpuSum = sum(a);
+  T* d_a;
+  HIP_CHECK(hipMalloc(&d_a, sizeof(T) * size));
+  HIP_CHECK(hipMemcpy(d_a, &a, sizeof(T) * size, hipMemcpyDefault));
+  hipLaunchKernelGGL(shflXorSum<T>, 1, size, 0, 0, d_a, size);
+  HIP_CHECK(hipMemcpy(&a, d_a, sizeof(T) * size, hipMemcpyDefault));
+  REQUIRE((compare(a[0], cpuSum)) == 0);
+  HIP_CHECK(hipFree(d_a));
+}
+
+/**
+ * @addtogroup __shfl __shfl
+ * @{
+ * @ingroup ShflTest
+ * `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` -
+ * Contains warp __shfl_up function
+ */
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Test case to verify __shfl_up warp functions for different datatypes.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/kernel/hipShflUpDownTest.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.6
+ *    - Gaurding this test against cuda with refernce to mentioned
+ * ticket SWDEV-379177
+ */
+
+TEST_CASE("Unit_runTestShfl_up") {
+  SECTION("runTestShflUp for int") { runTestShflUp<int>(); }
+  SECTION("runTestShflUp for float") { runTestShflUp<float>(); }
+  SECTION("runTestShflUp for double") { runTestShflUp<double>(); }
+  SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); }
+  SECTION("runTestShflUp for int64_t") { runTestShflUp<int64_t>(); }
+  SECTION("runTestShflUp for unsigned int") { runTestShflUp<unsigned int>(); }
+  SECTION("runTestShflUp for uint64_t") { runTestShflUp<uint64_t>(); }
+}
+/**
+ * End doxygen group __shfl.
+ * @}
+ */
+
+/**
+ * @addtogroup __shfl __shfl
+ * @{
+ * @ingroup ShflTest
+ * `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` -
+ * Contains warp __shfl_down function
+ */
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Test case to verify __shfl_down warp functions for different datatypes.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/kernel/hipShflUpDownTest.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.6
+ *    - Gaurding this test against cuda with refernce to mentioned
+ * ticket SWDEV-379177
+ */
+
+TEST_CASE("Unit_runTestShfl_Down") {
+  SECTION("runTestShflDown for int") { runTestShflDown<int>(); }
+  SECTION("runTestShflDown for float") { runTestShflDown<float>(); }
+  SECTION("runTestShflDown for double") { runTestShflDown<double>(); }
+  SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); }
+  SECTION("runTestShflDown for int64_t") { runTestShflDown<int64_t>(); }
+  SECTION("runTestShflDown for unsigned int") { runTestShflDown<unsigned int>(); }
+  SECTION("runTestShflDown for uint64_t") { runTestShflDown<uint64_t>(); }
+}
+/**
+ * End doxygen group __shfl.
+ * @}
+ */
+
+/**
+ * @addtogroup __shfl __shfl
+ * @{
+ * @ingroup ShflTest
+ * `T __shfl_xor(T var, int laneMask, int width=warpSize)` -
+ * Contains warp __shfl_xor function
+ */
+
+/**
+ * Test Description
+ * ------------------------
+ *    - Test case to verify __shfl_xor warp functions for different datatypes.
+
+ * Test source
+ * ------------------------
+ *    - catch/unit/kernel/hipShflUpDownTest.cc
+ * Test requirements
+ * ------------------------
+ *    - HIP_VERSION >= 5.6
+ *    - Gaurding this test against cuda with refernce to mentioned
+ * ticket SWDEV-379177
+ */
+
+TEST_CASE("Unit_runTestShfl_Xor") {
+  SECTION("runTestShflXor for int") { runTestShflXor<int>(); }
+  SECTION("runTestShflXor for float") { runTestShflXor<float>(); }
+  SECTION("runTestShflXor for double") { runTestShflXor<double>(); }
+  SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); }
+  SECTION("runTestShflXor for int64_t") { runTestShflXor<int64_t>(); }
+  SECTION("runTestShflXor for unsigned int") { runTestShflXor<unsigned int>(); }
+  SECTION("runTestShflXor for uint64_t") { runTestShflXor<uint64_t>(); }
+}
+/**
+ * End doxygen group __shfl.
+ * @}
+ */
diff --git a/projects/hip-tests/perftests/memory/hipPerfMemset.cpp b/projects/hip-tests/perftests/memory/hipPerfMemset.cpp
index a2db4c690a..2df0c9727b 100644
--- a/projects/hip-tests/perftests/memory/hipPerfMemset.cpp
+++ b/projects/hip-tests/perftests/memory/hipPerfMemset.cpp
@@ -1,437 +1,437 @@
-/*
- Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
- */
-
-/* HIT_START
- * BUILD: %t %s ../../src/test_common.cpp
- * TEST: %t
- * HIT_END
- */
-
-#include "test_common.h"
-#include <iostream>
-#include <chrono>
-
-static unsigned int sizeList[] = {
-  256, 512, 1024, 2048, 4096, 8192,
-};
-
-static unsigned int eleNumList[] = {
-    0x100, 0x400, 0x1000, 0x4000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000,
-    0x200000, 0x400000, 0x800000, 0x1000000
-};
-
-typedef struct _dataType {
-char memsetval = 0x42;
-char memsetD8val = 0xDE;
-int16_t memsetD16val = 0xDEAD;
-int memsetD32val = 0xDEADBEEF;
-}dataType;
-
-#define NUM_ITER 1000
-
-enum MemsetType {
-  hipMemsetTypeDefault,
-  hipMemsetTypeD8,
-  hipMemsetTypeD16,
-  hipMemsetTypeD32,
-  hipMemsetTypeMax
-
-};
-
-using namespace std;
-
-class hipPerfMemset {
-  private:
-    uint64_t     bufSize_;
-    unsigned int num_elements_;
-    unsigned int testNumEle_;
-    unsigned int _numSubTests = 0;
-    unsigned int _numSubTests2D = 0;
-    unsigned int _numSubTests3D = 0;
-    unsigned int num_sizes_ =0;
-
-  public:
-    hipPerfMemset() {
-    num_elements_ = sizeof(eleNumList) / sizeof(unsigned int);
-    _numSubTests = num_elements_ * hipMemsetTypeMax;
-
-    num_sizes_ = sizeof(sizeList) / sizeof(unsigned int);
-    _numSubTests2D = num_sizes_;
-    _numSubTests3D = _numSubTests2D;
-    };
-
-    ~hipPerfMemset() {};
-
-    void open(int deviceID);
-
-    template<typename T>
-    void run1D(unsigned int test, T memsetval, enum MemsetType type, bool async);
-
-    template<typename T>
-    void run2D(unsigned int test, T memsetval, enum MemsetType type, bool async);
-
-    template<typename T>
-    void run3D(unsigned int test, T memsetval, enum MemsetType type, bool async);
-
-    uint getNumTests() {
-      return _numSubTests;
-    }
-
-    uint getNumTests2D() {
-      return _numSubTests2D;
-    }
-    uint getNumTests3D() {
-      return _numSubTests3D;
-    }
-};
-
-
-void hipPerfMemset::open(int deviceId) {
-  int nGpu = 0;
-  HIPCHECK(hipGetDeviceCount(&nGpu));
-  if (nGpu < 1) {
-    failed("No GPU!");
-  }
-
-  HIPCHECK(hipSetDevice(deviceId));
-  hipDeviceProp_t props = {0};
-  HIPCHECK(hipGetDeviceProperties(&props, deviceId));
-  std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
-            << " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId
-            << std::endl;
-}
-
-template<typename T>
-void hipPerfMemset::run1D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
-
-  T * A_h;
-  T * A_d;
-
-  testNumEle_ = eleNumList[test % num_elements_];
-
-  bufSize_ = testNumEle_ * sizeof(uint32_t);
-
-  HIPCHECK(hipMalloc(&A_d, bufSize_));
-
-  A_h = reinterpret_cast<T*> (malloc(bufSize_));
-
-  hipStream_t stream;
-  HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
-
-  // Warm-up
-  if (async) {
-    HIPCHECK(hipMemsetAsync((void *)A_d, memsetval, bufSize_, stream));
-    HIPCHECK(hipStreamSynchronize(stream));
-  } else {
-    HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
-    HIPCHECK(hipDeviceSynchronize());
-  }
-
-  auto start = chrono::high_resolution_clock::now();
-  for (uint i = 0; i < NUM_ITER; i++) {
-    if (type == hipMemsetTypeDefault && !async) {
-      HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
-    }
-    else if (type == hipMemsetTypeDefault && async) {
-      HIPCHECK(hipMemsetAsync(A_d, memsetval, bufSize_, stream));
-    }
-    else if (type == hipMemsetTypeD8 && !async){
-      HIPCHECK(hipMemsetD8((hipDeviceptr_t)A_d, memsetval, bufSize_));
-    }
-    else if (type == hipMemsetTypeD8 && async) {
-      HIPCHECK(hipMemsetD8Async((hipDeviceptr_t)A_d, memsetval, bufSize_, stream));
-    }
-    else if (type == hipMemsetTypeD16 && !async) {
-      HIPCHECK(hipMemsetD16((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
-    }
-    else if (type == hipMemsetTypeD16 && async) {
-      HIPCHECK(hipMemsetD16Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
-    }
-    else if (type == hipMemsetTypeD32 && !async) {
-      HIPCHECK(hipMemsetD32((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
-    }
-    else if (type == hipMemsetTypeD32 && async) {
-      HIPCHECK(hipMemsetD32Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
-    }
-  }
-  if (async) {
-    HIPCHECK(hipStreamSynchronize(stream));
-  } else {
-    HIPCHECK(hipDeviceSynchronize());
-  }
-
-  auto end = chrono::high_resolution_clock::now();
-
-  HIPCHECK(hipMemcpy(A_h, A_d, bufSize_, hipMemcpyDeviceToHost) );
-
-  for (int i = 0; i < bufSize_ / sizeof(T); i++) {
-    if (A_h[i] != memsetval) {
-      cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
-           << ", memsetval: " << static_cast<int> (memsetval) << endl;
-      break;
-    }
-  }
-
-  HIPCHECK(hipFree(A_d));
-  free(A_h);
-
-  auto diff = std::chrono::duration<double>(end - start);
-  auto sec = diff.count();
-
-  auto perf = static_cast<double>((bufSize_ * NUM_ITER * (double)(1e-09)) / sec);
-
-  cout <<  "[" << setw(2) << test << "] " << setw(5) << bufSize_/1024 << " Kb " << setw(4)
-       << " typeSize " << (int)sizeof(T) << " : " << setw(7) << perf << " GB/s " << endl;
-}
-
-template<typename T>
-void hipPerfMemset::run2D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
-
-  bufSize_ = sizeList[test % num_sizes_];
-
-  size_t numH = bufSize_;
-  size_t numW = bufSize_;
-  size_t pitch_A;
-  size_t width = numW * sizeof(char);
-  size_t sizeElements = width * numH;
-  size_t elements = numW* numH;
-
-  T * A_h;
-  T * A_d;
-
-  HIPCHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width ,
-                          numH));
-  A_h = reinterpret_cast<char*>(malloc(sizeElements));
-
-  for (size_t i=0; i < elements; i++) {
-    A_h[i] = 1;
-  }
-
-  hipStream_t stream;
-  HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
-
-  // Warm-up
-  if (async) {
-    HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
-    HIPCHECK(hipStreamSynchronize(stream));
-  } else {
-    HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
-    HIPCHECK(hipDeviceSynchronize());
-  }
-
-  auto start = chrono::steady_clock::now();
-
-  for (uint i = 0; i < NUM_ITER; i++) {
-    if (type == hipMemsetTypeDefault && !async) {
-    HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
-    }
-    else if (type == hipMemsetTypeDefault && async) {
-      HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
-    }
-  }
-
-  if (async) {
-    HIPCHECK(hipStreamSynchronize(stream));
-  } else {
-    HIPCHECK(hipDeviceSynchronize());
-  }
-
-  auto end = chrono::steady_clock::now();
-
-  HIPCHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH,
-                       hipMemcpyDeviceToHost));
-
-  for (int i=0; i < elements; i++) {
-    if (A_h[i] != memsetval) {
-      cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
-           << ", memsetval: " << static_cast<int> (memsetval) << endl;
-      break;
-    }
-  }
-
-  chrono::duration<double> diff = end - start;
-
-  auto sec = diff.count();
-
-  auto perf = static_cast<double>((sizeElements* NUM_ITER * (double)(1e-09)) / sec);
-
-  cout << " hipPerf2DMemset" << (async ? "Async" : "     ") << "[" << test << "] "
-       << "  " << "(GB/s) for " << setw(5) << bufSize_
-       << " x " << setw(5) << bufSize_ << " bytes : " << setw(7) << perf <<  endl;
-
-  HIPCHECK(hipStreamDestroy(stream));
-  HIPCHECK(hipFree(A_d));
-  free(A_h);
-}
-
-template<typename T>
-void hipPerfMemset::run3D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
-
-    bufSize_ = sizeList[test % num_sizes_];
-
-    size_t numH = bufSize_;
-    size_t numW = bufSize_;
-    size_t depth = 10;
-    size_t width = numW * sizeof(char);
-    size_t sizeElements = width * numH * depth;
-    size_t elements = numW* numH* depth;
-
-    hipStream_t stream;
-    HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
-
-    T *A_h;
-
-    hipExtent extent = make_hipExtent(width, numH, depth);
-    hipPitchedPtr devPitchedPtr;
-
-    HIPCHECK(hipMalloc3D(&devPitchedPtr, extent));
-    A_h = (char*)malloc(sizeElements);
-    HIPASSERT(A_h != NULL);
-
-    for (size_t i=0; i<elements; i++) {
-        A_h[i] = 1;
-    }
-
-  // Warm-up
-  if (async) {
-    HIPCHECK(hipMemset3DAsync( devPitchedPtr, memsetval, extent, stream));
-    HIPCHECK(hipStreamSynchronize(stream));
-  } else {
-    HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
-    HIPCHECK(hipDeviceSynchronize());
-  }
-   auto start = chrono::steady_clock::now();
-
-   for (uint i = 0; i < NUM_ITER; i++) {
-     if (type == hipMemsetTypeDefault && !async) {
-       HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
-     }
-     else if (type == hipMemsetTypeDefault && async) {
-       HIPCHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, stream));
-     }
-   }
-
-  if (async) {
-    HIPCHECK(hipStreamSynchronize(stream));
-  } else {
-    HIPCHECK(hipDeviceSynchronize());
-  }
-
-  auto end = chrono::steady_clock::now();
-
-  hipMemcpy3DParms myparms = {0};
-  myparms.srcPos = make_hipPos(0,0,0);
-  myparms.dstPos = make_hipPos(0,0,0);
-  myparms.dstPtr = make_hipPitchedPtr(A_h, width , numW, numH);
-  myparms.srcPtr = devPitchedPtr;
-  myparms.extent = extent;
-
-  myparms.kind = hipMemcpyDeviceToHost;
-
-  HIPCHECK(hipMemcpy3D(&myparms));
-
-  for (int i=0; i<elements; i++) {
-    if (A_h[i] != memsetval) {
-      cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
-           << ", memsetval: " << static_cast<int> (memsetval) << endl;
-      break;
-      }
-  }
-
-  chrono::duration<double> diff = end - start;
-
-  auto sec = diff.count();
-
-  auto perf = static_cast<double>((sizeElements * NUM_ITER * (double)(1e-09)) / sec);
-
-  cout << " hipPerf3DMemset" << (async ? "Async" : "     ") << "[" << test << "] " << "  "
-       <<  "(GB/s) for " << setw(5) << bufSize_ << " x " << setw(5)
-       << bufSize_  << " x " << depth << " bytes : " << setw(7) << perf <<  endl;
-  HIPCHECK(hipFree(devPitchedPtr.ptr));
-  free(A_h);
-}
-
-int main() {
-  hipPerfMemset hipPerfMemset;
-
-  dataType pattern;
-  int deviceId = 0;
-  hipPerfMemset.open(deviceId);
-  MemsetType type;
-
-  int numTests = hipPerfMemset.getNumTests();
-  int numTests2D = hipPerfMemset.getNumTests2D();
-  int numTests3D = hipPerfMemset.getNumTests3D();
-
-
-  cout << "--------------------- 1D buffer -------------------" << endl;
-  bool async= false;
-  for (uint i = 0; i < 2 ; i++) {
-    cout << endl;
-
-    for (auto testCase = 0; testCase < numTests; testCase++) {
-      if (testCase < sizeof(eleNumList) / sizeof(uint32_t)) {
-        cout << "API: hipMemsetD8" << (async ? "Async " : "      ");
-        hipPerfMemset.run1D(testCase, pattern.memsetval, hipMemsetTypeD8, async);
-      }
-
-      else if (testCase < 2 * sizeof(eleNumList) / sizeof(uint32_t)) {
-        cout << "API: hipMemsetD16" << (async ? "Async" : "     ");
-        hipPerfMemset.run1D(testCase,pattern.memsetD16val, hipMemsetTypeD16, async);
-      }
-
-      else if (testCase < 3 * sizeof(eleNumList) / sizeof(uint32_t)) {
-        cout << "API: hipMemsetD32" << (async ? "Async" : "     ");
-        hipPerfMemset.run1D(testCase,pattern.memsetD32val, hipMemsetTypeD32, async);
-      }
-
-      else {
-        cout << "API: hipMemset" << (async ? "Async   " : "        ");
-        hipPerfMemset.run1D(testCase,pattern.memsetval, hipMemsetTypeDefault, async);
-      }
-    }
-    async = true;
-  }
-
-  cout << endl;
-  cout << "------------------ 2D buffer arrays ---------------" << endl;
-
-  async = false;
-  for (uint i = 0; i < 2; i++) {
-    cout << endl;
-    for (uint test = 0; test < numTests2D; test++) {
-      hipPerfMemset.run2D(test, pattern.memsetval, hipMemsetTypeDefault, async);
-    }
-    async = true;
-  }
-
-  cout << endl;
-  cout << "------------------ 3D buffer arrays ---------------" << endl;
-
-  async = false;
-  for (uint i = 0; i < 2; i++) {
-    cout << endl;
-    for (uint test =0; test < numTests3D; test++) {
-      hipPerfMemset.run3D(test, pattern.memsetval, hipMemsetTypeDefault, async);
-    }
-    async = true;
-  }
-
-  passed();
-}
+/*
+ Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc. All rights reserved.
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+ */
+
+/* HIT_START
+ * BUILD: %t %s ../../src/test_common.cpp
+ * TEST: %t
+ * HIT_END
+ */
+
+#include "test_common.h"
+#include <iostream>
+#include <chrono>
+
+static unsigned int sizeList[] = {
+  256, 512, 1024, 2048, 4096, 8192,
+};
+
+static unsigned int eleNumList[] = {
+    0x100, 0x400, 0x1000, 0x4000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000,
+    0x200000, 0x400000, 0x800000, 0x1000000
+};
+
+typedef struct _dataType {
+char memsetval = 0x42;
+char memsetD8val = 0xDE;
+int16_t memsetD16val = 0xDEAD;
+int memsetD32val = 0xDEADBEEF;
+}dataType;
+
+#define NUM_ITER 1000
+
+enum MemsetType {
+  hipMemsetTypeDefault,
+  hipMemsetTypeD8,
+  hipMemsetTypeD16,
+  hipMemsetTypeD32,
+  hipMemsetTypeMax
+
+};
+
+using namespace std;
+
+class hipPerfMemset {
+  private:
+    uint64_t     bufSize_;
+    unsigned int num_elements_;
+    unsigned int testNumEle_;
+    unsigned int _numSubTests = 0;
+    unsigned int _numSubTests2D = 0;
+    unsigned int _numSubTests3D = 0;
+    unsigned int num_sizes_ =0;
+
+  public:
+    hipPerfMemset() {
+    num_elements_ = sizeof(eleNumList) / sizeof(unsigned int);
+    _numSubTests = num_elements_ * hipMemsetTypeMax;
+
+    num_sizes_ = sizeof(sizeList) / sizeof(unsigned int);
+    _numSubTests2D = num_sizes_;
+    _numSubTests3D = _numSubTests2D;
+    };
+
+    ~hipPerfMemset() {};
+
+    void open(int deviceID);
+
+    template<typename T>
+    void run1D(unsigned int test, T memsetval, enum MemsetType type, bool async);
+
+    template<typename T>
+    void run2D(unsigned int test, T memsetval, enum MemsetType type, bool async);
+
+    template<typename T>
+    void run3D(unsigned int test, T memsetval, enum MemsetType type, bool async);
+
+    uint getNumTests() {
+      return _numSubTests;
+    }
+
+    uint getNumTests2D() {
+      return _numSubTests2D;
+    }
+    uint getNumTests3D() {
+      return _numSubTests3D;
+    }
+};
+
+
+void hipPerfMemset::open(int deviceId) {
+  int nGpu = 0;
+  HIPCHECK(hipGetDeviceCount(&nGpu));
+  if (nGpu < 1) {
+    failed("No GPU!");
+  }
+
+  HIPCHECK(hipSetDevice(deviceId));
+  hipDeviceProp_t props = {0};
+  HIPCHECK(hipGetDeviceProperties(&props, deviceId));
+  std::cout << "info: running on bus " << "0x" << props.pciBusID << " " << props.name
+            << " with " << props.multiProcessorCount << " CUs" << " and device id: " << deviceId
+            << std::endl;
+}
+
+template<typename T>
+void hipPerfMemset::run1D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
+
+  T * A_h;
+  T * A_d;
+
+  testNumEle_ = eleNumList[test % num_elements_];
+
+  bufSize_ = testNumEle_ * sizeof(uint32_t);
+
+  HIPCHECK(hipMalloc(&A_d, bufSize_));
+
+  A_h = reinterpret_cast<T*> (malloc(bufSize_));
+
+  hipStream_t stream;
+  HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
+
+  // Warm-up
+  if (async) {
+    HIPCHECK(hipMemsetAsync((void *)A_d, memsetval, bufSize_, stream));
+    HIPCHECK(hipStreamSynchronize(stream));
+  } else {
+    HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
+    HIPCHECK(hipDeviceSynchronize());
+  }
+
+  auto start = chrono::high_resolution_clock::now();
+  for (uint i = 0; i < NUM_ITER; i++) {
+    if (type == hipMemsetTypeDefault && !async) {
+      HIPCHECK(hipMemset((void *)A_d, memsetval, bufSize_));
+    }
+    else if (type == hipMemsetTypeDefault && async) {
+      HIPCHECK(hipMemsetAsync(A_d, memsetval, bufSize_, stream));
+    }
+    else if (type == hipMemsetTypeD8 && !async){
+      HIPCHECK(hipMemsetD8((hipDeviceptr_t)A_d, memsetval, bufSize_));
+    }
+    else if (type == hipMemsetTypeD8 && async) {
+      HIPCHECK(hipMemsetD8Async((hipDeviceptr_t)A_d, memsetval, bufSize_, stream));
+    }
+    else if (type == hipMemsetTypeD16 && !async) {
+      HIPCHECK(hipMemsetD16((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
+    }
+    else if (type == hipMemsetTypeD16 && async) {
+      HIPCHECK(hipMemsetD16Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
+    }
+    else if (type == hipMemsetTypeD32 && !async) {
+      HIPCHECK(hipMemsetD32((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T)));
+    }
+    else if (type == hipMemsetTypeD32 && async) {
+      HIPCHECK(hipMemsetD32Async((hipDeviceptr_t)A_d, memsetval, bufSize_/sizeof(T), stream));
+    }
+  }
+  if (async) {
+    HIPCHECK(hipStreamSynchronize(stream));
+  } else {
+    HIPCHECK(hipDeviceSynchronize());
+  }
+
+  auto end = chrono::high_resolution_clock::now();
+
+  HIPCHECK(hipMemcpy(A_h, A_d, bufSize_, hipMemcpyDeviceToHost) );
+
+  for (int i = 0; i < bufSize_ / sizeof(T); i++) {
+    if (A_h[i] != memsetval) {
+      cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
+           << ", memsetval: " << static_cast<int> (memsetval) << endl;
+      break;
+    }
+  }
+
+  HIPCHECK(hipFree(A_d));
+  free(A_h);
+
+  auto diff = std::chrono::duration<double>(end - start);
+  auto sec = diff.count();
+
+  auto perf = static_cast<double>((bufSize_ * NUM_ITER * (double)(1e-09)) / sec);
+
+  cout <<  "[" << setw(2) << test << "] " << setw(5) << bufSize_/1024 << " Kb " << setw(4)
+       << " typeSize " << (int)sizeof(T) << " : " << setw(7) << perf << " GB/s " << endl;
+}
+
+template<typename T>
+void hipPerfMemset::run2D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
+
+  bufSize_ = sizeList[test % num_sizes_];
+
+  size_t numH = bufSize_;
+  size_t numW = bufSize_;
+  size_t pitch_A;
+  size_t width = numW * sizeof(char);
+  size_t sizeElements = width * numH;
+  size_t elements = numW* numH;
+
+  T * A_h;
+  T * A_d;
+
+  HIPCHECK(hipMallocPitch(reinterpret_cast<void**>(&A_d), &pitch_A, width ,
+                          numH));
+  A_h = reinterpret_cast<char*>(malloc(sizeElements));
+
+  for (size_t i=0; i < elements; i++) {
+    A_h[i] = 1;
+  }
+
+  hipStream_t stream;
+  HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
+
+  // Warm-up
+  if (async) {
+    HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
+    HIPCHECK(hipStreamSynchronize(stream));
+  } else {
+    HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
+    HIPCHECK(hipDeviceSynchronize());
+  }
+
+  auto start = chrono::steady_clock::now();
+
+  for (uint i = 0; i < NUM_ITER; i++) {
+    if (type == hipMemsetTypeDefault && !async) {
+    HIPCHECK(hipMemset2D(A_d, pitch_A, memsetval, numW, numH));
+    }
+    else if (type == hipMemsetTypeDefault && async) {
+      HIPCHECK(hipMemset2DAsync(A_d, pitch_A, memsetval, numW, numH, stream));
+    }
+  }
+
+  if (async) {
+    HIPCHECK(hipStreamSynchronize(stream));
+  } else {
+    HIPCHECK(hipDeviceSynchronize());
+  }
+
+  auto end = chrono::steady_clock::now();
+
+  HIPCHECK(hipMemcpy2D(A_h, width, A_d, pitch_A, numW, numH,
+                       hipMemcpyDeviceToHost));
+
+  for (int i=0; i < elements; i++) {
+    if (A_h[i] != memsetval) {
+      cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
+           << ", memsetval: " << static_cast<int> (memsetval) << endl;
+      break;
+    }
+  }
+
+  chrono::duration<double> diff = end - start;
+
+  auto sec = diff.count();
+
+  auto perf = static_cast<double>((sizeElements* NUM_ITER * (double)(1e-09)) / sec);
+
+  cout << " hipPerf2DMemset" << (async ? "Async" : "     ") << "[" << test << "] "
+       << "  " << "(GB/s) for " << setw(5) << bufSize_
+       << " x " << setw(5) << bufSize_ << " bytes : " << setw(7) << perf <<  endl;
+
+  HIPCHECK(hipStreamDestroy(stream));
+  HIPCHECK(hipFree(A_d));
+  free(A_h);
+}
+
+template<typename T>
+void hipPerfMemset::run3D(unsigned int test, T memsetval, enum MemsetType type, bool async) {
+
+    bufSize_ = sizeList[test % num_sizes_];
+
+    size_t numH = bufSize_;
+    size_t numW = bufSize_;
+    size_t depth = 10;
+    size_t width = numW * sizeof(char);
+    size_t sizeElements = width * numH * depth;
+    size_t elements = numW* numH* depth;
+
+    hipStream_t stream;
+    HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
+
+    T *A_h;
+
+    hipExtent extent = make_hipExtent(width, numH, depth);
+    hipPitchedPtr devPitchedPtr;
+
+    HIPCHECK(hipMalloc3D(&devPitchedPtr, extent));
+    A_h = (char*)malloc(sizeElements);
+    HIPASSERT(A_h != NULL);
+
+    for (size_t i=0; i<elements; i++) {
+        A_h[i] = 1;
+    }
+
+  // Warm-up
+  if (async) {
+    HIPCHECK(hipMemset3DAsync( devPitchedPtr, memsetval, extent, stream));
+    HIPCHECK(hipStreamSynchronize(stream));
+  } else {
+    HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
+    HIPCHECK(hipDeviceSynchronize());
+  }
+   auto start = chrono::steady_clock::now();
+
+   for (uint i = 0; i < NUM_ITER; i++) {
+     if (type == hipMemsetTypeDefault && !async) {
+       HIPCHECK(hipMemset3D( devPitchedPtr, memsetval, extent));
+     }
+     else if (type == hipMemsetTypeDefault && async) {
+       HIPCHECK(hipMemset3DAsync(devPitchedPtr, memsetval, extent, stream));
+     }
+   }
+
+  if (async) {
+    HIPCHECK(hipStreamSynchronize(stream));
+  } else {
+    HIPCHECK(hipDeviceSynchronize());
+  }
+
+  auto end = chrono::steady_clock::now();
+
+  hipMemcpy3DParms myparms = {0};
+  myparms.srcPos = make_hipPos(0,0,0);
+  myparms.dstPos = make_hipPos(0,0,0);
+  myparms.dstPtr = make_hipPitchedPtr(A_h, width , numW, numH);
+  myparms.srcPtr = devPitchedPtr;
+  myparms.extent = extent;
+
+  myparms.kind = hipMemcpyDeviceToHost;
+
+  HIPCHECK(hipMemcpy3D(&myparms));
+
+  for (int i=0; i<elements; i++) {
+    if (A_h[i] != memsetval) {
+      cout << "mismatch at index " << i << " computed: " << static_cast<int> (A_h[i])
+           << ", memsetval: " << static_cast<int> (memsetval) << endl;
+      break;
+      }
+  }
+
+  chrono::duration<double> diff = end - start;
+
+  auto sec = diff.count();
+
+  auto perf = static_cast<double>((sizeElements * NUM_ITER * (double)(1e-09)) / sec);
+
+  cout << " hipPerf3DMemset" << (async ? "Async" : "     ") << "[" << test << "] " << "  "
+       <<  "(GB/s) for " << setw(5) << bufSize_ << " x " << setw(5)
+       << bufSize_  << " x " << depth << " bytes : " << setw(7) << perf <<  endl;
+  HIPCHECK(hipFree(devPitchedPtr.ptr));
+  free(A_h);
+}
+
+int main() {
+  hipPerfMemset hipPerfMemset;
+
+  dataType pattern;
+  int deviceId = 0;
+  hipPerfMemset.open(deviceId);
+  MemsetType type;
+
+  int numTests = hipPerfMemset.getNumTests();
+  int numTests2D = hipPerfMemset.getNumTests2D();
+  int numTests3D = hipPerfMemset.getNumTests3D();
+
+
+  cout << "--------------------- 1D buffer -------------------" << endl;
+  bool async= false;
+  for (uint i = 0; i < 2 ; i++) {
+    cout << endl;
+
+    for (auto testCase = 0; testCase < numTests; testCase++) {
+      if (testCase < sizeof(eleNumList) / sizeof(uint32_t)) {
+        cout << "API: hipMemsetD8" << (async ? "Async " : "      ");
+        hipPerfMemset.run1D(testCase, pattern.memsetval, hipMemsetTypeD8, async);
+      }
+
+      else if (testCase < 2 * sizeof(eleNumList) / sizeof(uint32_t)) {
+        cout << "API: hipMemsetD16" << (async ? "Async" : "     ");
+        hipPerfMemset.run1D(testCase,pattern.memsetD16val, hipMemsetTypeD16, async);
+      }
+
+      else if (testCase < 3 * sizeof(eleNumList) / sizeof(uint32_t)) {
+        cout << "API: hipMemsetD32" << (async ? "Async" : "     ");
+        hipPerfMemset.run1D(testCase,pattern.memsetD32val, hipMemsetTypeD32, async);
+      }
+
+      else {
+        cout << "API: hipMemset" << (async ? "Async   " : "        ");
+        hipPerfMemset.run1D(testCase,pattern.memsetval, hipMemsetTypeDefault, async);
+      }
+    }
+    async = true;
+  }
+
+  cout << endl;
+  cout << "------------------ 2D buffer arrays ---------------" << endl;
+
+  async = false;
+  for (uint i = 0; i < 2; i++) {
+    cout << endl;
+    for (uint test = 0; test < numTests2D; test++) {
+      hipPerfMemset.run2D(test, pattern.memsetval, hipMemsetTypeDefault, async);
+    }
+    async = true;
+  }
+
+  cout << endl;
+  cout << "------------------ 3D buffer arrays ---------------" << endl;
+
+  async = false;
+  for (uint i = 0; i < 2; i++) {
+    cout << endl;
+    for (uint test =0; test < numTests3D; test++) {
+      hipPerfMemset.run3D(test, pattern.memsetval, hipMemsetTypeDefault, async);
+    }
+    async = true;
+  }
+
+  passed();
+}
diff --git a/projects/hip-tests/samples/README.md b/projects/hip-tests/samples/README.md
index 709eee3e99..dcf178c883 100644
--- a/projects/hip-tests/samples/README.md
+++ b/projects/hip-tests/samples/README.md
@@ -41,4 +41,4 @@ cmake ../samples
 
 make package_samples
 
-## Note: sample 2_Cookbook/22_cmake_hip_lang is current not included in toplevel cmake. To build this sample from toplevel cmake, uncomment Line 43 inside samples/2_Cookbook/CMakeLists.txt. 
+## Note: sample 2_Cookbook/22_cmake_hip_lang is current not included in toplevel cmake. To build this sample from toplevel cmake, uncomment Line 43 inside samples/2_Cookbook/CMakeLists.txt.