diff --git a/projects/clr/hipamd/src/hip_device.cpp b/projects/clr/hipamd/src/hip_device.cpp
index b82b787167..03b7d7938b 100644
--- a/projects/clr/hipamd/src/hip_device.cpp
+++ b/projects/clr/hipamd/src/hip_device.cpp
@@ -32,7 +32,7 @@ namespace hip {
 
 // ================================================================================================
 hip::Stream* Device::NullStream(bool wait) {
-  ClPrint(amd::LOG_DEBUG, amd::LOG_WAIT, "NullStream %p, wait %d", null_stream_, wait);
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_WAIT, "NullStream %p, wait %d", null_stream_, wait);
   if (null_stream_ == nullptr) {
     amd::ScopedLock lock(lock_);
     if (null_stream_ == nullptr) {
@@ -188,7 +188,7 @@ void Device::WaitActiveStreams(hip::Stream* blocking_stream, bool wait_null_stre
 
   if (wait_null_stream) {
     if (null_stream_) {
-      ClPrint(amd::LOG_DEBUG, amd::LOG_WAIT, "Waiting on nullstream %p", null_stream_);
+      ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_WAIT, "Waiting on nullstream %p", null_stream_);
       waitForStream(null_stream_);
     }
   } else {
@@ -199,7 +199,8 @@ void Device::WaitActiveStreams(hip::Stream* blocking_stream, bool wait_null_stre
           ((active_stream->Flags() & hipStreamNonBlocking) == 0) &&
           // and it's not the current stream
           (active_stream != blocking_stream)) {
-        ClPrint(amd::LOG_DEBUG, amd::LOG_WAIT, "Waiting on active stream %p", active_stream);
+        ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_WAIT, "Waiting on active stream %p",
+                active_stream);
         // Get the last valid command
         waitForStream(active_stream);
       }
diff --git a/projects/clr/hipamd/src/hip_event.cpp b/projects/clr/hipamd/src/hip_event.cpp
index f4f8c10416..8b533712c8 100644
--- a/projects/clr/hipamd/src/hip_event.cpp
+++ b/projects/clr/hipamd/src/hip_event.cpp
@@ -399,7 +399,7 @@ hipError_t hipEventRecord_common(hipEvent_t event, hipStream_t stream, unsigned
   e->SetCaptureStream(stream);
   if ((stream != nullptr && stream != hipStreamLegacy) &&
       (s->GetCaptureStatus() == hipStreamCaptureStatusActive)) {
-    ClPrint(amd::LOG_INFO, amd::LOG_API,
+    ClPrint(amd::LOG_INFO, amd::LOG_CODE,
             "[hipGraph] Current capture node EventRecord on stream : %p, Event %p", stream, event);
     s->SetCaptureEvent(event);
     std::vector<hip::GraphNode*> lastCapturedNodes = s->GetLastCapturedNodes();
@@ -411,7 +411,7 @@ hipError_t hipEventRecord_common(hipEvent_t event, hipStream_t stream, unsigned
           reinterpret_cast<hip::GraphNode* const*>(s->GetLastCapturedNodes().data()),
           s->GetLastCapturedNodes().size(), false);
       if (status != hipSuccess) {
-        ClPrint(amd::LOG_ERROR, amd::LOG_API, "hipEventRecord add external event node failed");
+        ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "hipEventRecord add external event node failed");
         return status;
       }
       s->SetLastCapturedNode(node);
diff --git a/projects/clr/hipamd/src/hip_graph.cpp b/projects/clr/hipamd/src/hip_graph.cpp
index a82fc8da0a..0b3ee071ea 100644
--- a/projects/clr/hipamd/src/hip_graph.cpp
+++ b/projects/clr/hipamd/src/hip_graph.cpp
@@ -228,7 +228,7 @@ hipError_t ihipGraphAddMemsetNode(hip::GraphNode** pGraphNode, hip::Graph* graph
 
 hipError_t capturehipLaunchKernel(hipStream_t& stream, const void*& hostFunction, dim3& gridDim,
                                   dim3& blockDim, void**& args, size_t& sharedMemBytes) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API,
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
           "[hipGraph] Current capture node LaunchKernel on stream : %p", stream);
 
   if (!hip::isValid(stream)) {
@@ -316,7 +316,7 @@ hipError_t capturehipExtModuleLaunchKernel(hipStream_t& stream, hipFunction_t& f
                                            size_t& sharedMemBytes, void**& kernelParams,
                                            void**& extra, hipEvent_t& startEvent,
                                            hipEvent_t& stopEvent, uint32_t& flags) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API,
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
           "[hipGraph] Current capture node ExtModuleLaunchKernel on stream : %p", stream);
   return ihipExtLaunchKernel(stream, f, globalWorkSizeX / localWorkSizeX,
                              globalWorkSizeY / localWorkSizeY, globalWorkSizeZ / localWorkSizeZ,
@@ -329,7 +329,7 @@ hipError_t capturehipExtModuleLaunchKernel(hipStream_t& stream, hipFunction_t& f
 hipError_t capturehipExtLaunchKernel(hipStream_t& stream, const void*& hostFunction, dim3& gridDim,
                                      dim3& blockDim, void**& args, size_t& sharedMemBytes,
                                      hipEvent_t& startEvent, hipEvent_t& stopEvent, int& flags) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API,
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
           "[hipGraph] Current capture node ExtLaunchKernel on stream : %p", stream);
   return ihipExtLaunchKernel(
       stream, reinterpret_cast<hipFunction_t>(const_cast<void*>(hostFunction)), gridDim.x,
@@ -342,7 +342,7 @@ hipError_t capturehipModuleLaunchKernel(hipStream_t& stream, hipFunction_t& f, u
                                         uint32_t& blockDimY, uint32_t& blockDimZ,
                                         uint32_t& sharedMemBytes, void**& kernelParams,
                                         void**& extra) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API,
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
           "[hipGraph] Current capture node ModuleLaunchKernel on stream : %p", stream);
   if (!hip::isValid(stream)) {
     return hipErrorContextIsDestroyed;
@@ -372,7 +372,7 @@ hipError_t capturehipModuleLaunchCooperativeKernel(hipStream_t& stream, hipFunct
                                                    uint32_t& gridDimZ, uint32_t& blockDimX,
                                                    uint32_t& blockDimY, uint32_t& blockDimZ,
                                                    uint32_t& sharedMemBytes, void**& kernelParams) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API,
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
           "[hipGraph] Current capture node ModuleLaunchCooperativeKernel on stream : %p", stream);
 
   if (!hip::isValid(stream)) {
@@ -403,8 +403,8 @@ hipError_t capturehipModuleLaunchCooperativeKernel(hipStream_t& stream, hipFunct
 
 hipError_t capturehipLaunchByPtr(hipStream_t& stream, hipFunction_t func, dim3 blockDim,
                                  dim3 gridDim, unsigned int sharedMemBytes, void** extra) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node LaunchByPtr on stream : %p",
-          stream);
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
+          "[hipGraph] Current capture node LaunchByPtr on stream : %p", stream);
   if (!hip::isValid(stream)) {
     return hipErrorContextIsDestroyed;
   }
@@ -433,7 +433,7 @@ hipError_t capturehipLaunchByPtr(hipStream_t& stream, hipFunction_t func, dim3 b
 hipError_t capturehipLaunchCooperativeKernel(hipStream_t& stream, const void*& f, dim3& gridDim,
                                              dim3& blockDim, void**& kernelParams,
                                              uint32_t& sharedMemBytes) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API,
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
           "[hipGraph] Current capture node LaunchCooperativeKernel on stream : %p", stream);
   if (!hip::isValid(stream)) {
     return hipErrorContextIsDestroyed;
@@ -462,8 +462,8 @@ hipError_t capturehipLaunchCooperativeKernel(hipStream_t& stream, const void*& f
 }
 
 hipError_t capturehipMemcpy3DAsync(hipStream_t& stream, const hipMemcpy3DParms*& p) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node Memcpy3D on stream : %p",
-          stream);
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
+          "[hipGraph] Current capture node Memcpy3D on stream : %p", stream);
   if (!hip::isValid(stream)) {
     return hipErrorContextIsDestroyed;
   }
@@ -488,8 +488,8 @@ hipError_t capturehipMemcpy3DAsync(hipStream_t& stream, const hipMemcpy3DParms*&
 hipError_t capturehipMemcpy2DAsync(hipStream_t& stream, void*& dst, size_t& dpitch,
                                    const void*& src, size_t& spitch, size_t& width, size_t& height,
                                    hipMemcpyKind& kind) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node Memcpy2D on stream : %p",
-          stream);
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
+          "[hipGraph] Current capture node Memcpy2D on stream : %p", stream);
   if (dst == nullptr || src == nullptr) {
     return hipErrorInvalidValue;
   }
@@ -536,7 +536,7 @@ hipError_t capturehipMemcpy2DFromArrayAsync(hipStream_t& stream, void*& dst, siz
                                             hipArray_const_t& src, size_t& wOffsetSrc,
                                             size_t& hOffsetSrc, size_t& width, size_t& height,
                                             hipMemcpyKind& kind) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API,
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
           "[hipGraph] Current capture node Memcpy2DFromArray on stream : %p", stream);
 
   // Skip zero-sized copies
@@ -577,7 +577,7 @@ hipError_t capturehipMemcpy2DFromArrayAsync(hipStream_t& stream, void*& dst, siz
 hipError_t capturehipMemcpy2DToArrayAsync(hipStream_t& stream, hipArray_t& dst, size_t& wOffset,
                                           size_t& hOffset, const void*& src, size_t& spitch,
                                           size_t& width, size_t& height, hipMemcpyKind& kind) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API,
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
           "[hipGraph] Current capture node Memcpy2DFromArray on stream : %p", stream);
 
   // Skip zero-sized copies
@@ -616,7 +616,7 @@ hipError_t capturehipMemcpy2DToArrayAsync(hipStream_t& stream, hipArray_t& dst,
 }
 
 hipError_t capturehipMemcpyParam2DAsync(hipStream_t& stream, const hip_Memcpy2D*& pCopy) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API,
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
           "[hipGraph] Current capture node MemcpyParam2D on stream : %p", stream);
   if (!hip::isValid(stream)) {
     return hipErrorContextIsDestroyed;
@@ -691,7 +691,7 @@ hipError_t capturehipMemcpyParam2DAsync(hipStream_t& stream, const hip_Memcpy2D*
 
 hipError_t capturehipMemcpyAtoHAsync(hipStream_t& stream, void*& dstHost, hipArray_t& srcArray,
                                      size_t& srcOffset, size_t& ByteCount) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API,
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
           "[hipGraph] Current capture node MemcpyParam2D on stream : %p", stream);
   if (srcArray == nullptr || dstHost == nullptr) {
     return hipErrorInvalidValue;
@@ -720,7 +720,7 @@ hipError_t capturehipMemcpyAtoHAsync(hipStream_t& stream, void*& dstHost, hipArr
 
 hipError_t capturehipMemcpyHtoAAsync(hipStream_t& stream, hipArray_t& dstArray, size_t& dstOffset,
                                      const void*& srcHost, size_t& ByteCount) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API,
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
           "[hipGraph] Current capture node MemcpyParam2D on stream : %p", stream);
   if (dstArray == nullptr || srcHost == nullptr) {
     return hipErrorInvalidValue;
@@ -748,6 +748,8 @@ hipError_t capturehipMemcpyHtoAAsync(hipStream_t& stream, hipArray_t& dstArray,
 
 hipError_t capturehipMemcpy(hipStream_t stream, void* dst, const void* src, size_t sizeBytes,
                             hipMemcpyKind kind) {
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
+          "[hipGraph] Current capture node Memcpy on stream : %p", stream);
   if (!hip::isValid(stream)) {
     return hipErrorContextIsDestroyed;
   }
@@ -770,8 +772,8 @@ hipError_t capturehipMemcpy(hipStream_t stream, void* dst, const void* src, size
 
 hipError_t capturehipMemcpyAsync(hipStream_t& stream, void*& dst, const void*& src,
                                  size_t& sizeBytes, hipMemcpyKind& kind) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node Memcpy1D on stream : %p",
-          stream);
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
+          "[hipGraph] Current capture node Memcpy1D on stream : %p", stream);
   if (!hip::isValid(stream)) {
     return hipErrorContextIsDestroyed;
   }
@@ -779,9 +781,10 @@ hipError_t capturehipMemcpyAsync(hipStream_t& stream, void*& dst, const void*& s
 }
 
 hipError_t capturehipMemcpyHtoDAsync(hipStream_t& stream, hipDeviceptr_t& dstDevice,
-                                     const void*& srcHost, size_t& ByteCount, hipMemcpyKind& kind) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node MemcpyHtoD on stream : %p",
-          stream);
+                                     const void*& srcHost, size_t& ByteCount,
+                                     hipMemcpyKind& kind) {
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
+          "[hipGraph] Current capture node MemcpyHtoD on stream : %p", stream);
   if (!hip::isValid(stream)) {
     return hipErrorContextIsDestroyed;
   }
@@ -791,18 +794,19 @@ hipError_t capturehipMemcpyHtoDAsync(hipStream_t& stream, hipDeviceptr_t& dstDev
 hipError_t capturehipMemcpyDtoDAsync(hipStream_t& stream, hipDeviceptr_t& dstDevice,
                                      hipDeviceptr_t& srcDevice, size_t& ByteCount,
                                      hipMemcpyKind& kind) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API,
-          "[hipGraph] Current capture node hipMemcpyDtoD on stream : %p", stream);
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
+          "[hipGraph] Current capture node MemcpyDtoD on stream : %p", stream);
   if (!hip::isValid(stream)) {
     return hipErrorContextIsDestroyed;
   }
   return capturehipMemcpy(stream, dstDevice, srcDevice, ByteCount, kind);
 }
 
-hipError_t capturehipMemcpyDtoHAsync(hipStream_t& stream, void*& dstHost, hipDeviceptr_t& srcDevice,
-                                     size_t& ByteCount, hipMemcpyKind& kind) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API,
-          "[hipGraph] Current capture node hipMemcpyDtoH on stream : %p", stream);
+hipError_t capturehipMemcpyDtoHAsync(hipStream_t& stream, void*& dstHost,
+                                     hipDeviceptr_t& srcDevice, size_t& ByteCount,
+                                     hipMemcpyKind& kind) {
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
+          "[hipGraph] Current capture node MemcpyDtoH on stream : %p", stream);
   if (!hip::isValid(stream)) {
     return hipErrorContextIsDestroyed;
   }
@@ -810,8 +814,9 @@ hipError_t capturehipMemcpyDtoHAsync(hipStream_t& stream, void*& dstHost, hipDev
 }
 
 hipError_t capturehipMemcpyFromSymbolAsync(hipStream_t& stream, void*& dst, const void*& symbol,
-                                           size_t& sizeBytes, size_t& offset, hipMemcpyKind& kind) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API,
+                                           size_t& sizeBytes, size_t& offset,
+                                           hipMemcpyKind& kind) {
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
           "[hipGraph] Current capture node MemcpyFromSymbolNode on stream : %p", stream);
 
   if (kind != hipMemcpyDeviceToHost && kind != hipMemcpyDeviceToDevice &&
@@ -847,7 +852,7 @@ hipError_t capturehipMemcpyFromSymbolAsync(hipStream_t& stream, void*& dst, cons
 
 hipError_t capturehipMemcpyToSymbolAsync(hipStream_t& stream, const void*& symbol, const void*& src,
                                          size_t& sizeBytes, size_t& offset, hipMemcpyKind& kind) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API,
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
           "[hipGraph] Current capture node MemcpyToSymbolNode on stream : %p", stream);
 
   if (kind != hipMemcpyHostToDevice && kind != hipMemcpyDeviceToDevice &&
@@ -882,8 +887,8 @@ hipError_t capturehipMemcpyToSymbolAsync(hipStream_t& stream, const void*& symbo
 
 hipError_t capturehipMemsetAsync(hipStream_t& stream, void*& dst, int& value, size_t& valueSize,
                                  size_t& sizeBytes) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node Memset1D on stream : %p",
-          stream);
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
+          "[hipGraph] Current capture node Memset1D on stream : %p", stream);
   if (!hip::isValid(stream)) {
     return hipErrorContextIsDestroyed;
   }
@@ -908,8 +913,8 @@ hipError_t capturehipMemsetAsync(hipStream_t& stream, void*& dst, int& value, si
 
 hipError_t capturehipMemset2DAsync(hipStream_t& stream, void*& dst, size_t& pitch, int& value,
                                    size_t& width, size_t& height) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node Memset2D on stream : %p",
-          stream);
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
+          "[hipGraph] Current capture node Memset2D on stream : %p", stream);
   hipMemsetParams memsetParams = {0};
   if (!hip::isValid(stream)) {
     return hipErrorContextIsDestroyed;
@@ -934,8 +939,8 @@ hipError_t capturehipMemset2DAsync(hipStream_t& stream, void*& dst, size_t& pitc
 
 hipError_t capturehipMemset3DAsync(hipStream_t& stream, hipPitchedPtr& pitchedDevPtr, int& value,
                                    hipExtent& extent) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node Memset3D on stream : %p",
-          stream);
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
+          "[hipGraph] Current capture node Memset3D on stream : %p", stream);
   if (!hip::isValid(stream)) {
     return hipErrorContextIsDestroyed;
   }
@@ -966,8 +971,8 @@ hipError_t capturehipMemset3DAsync(hipStream_t& stream, hipPitchedPtr& pitchedDe
 }
 
 hipError_t capturehipLaunchHostFunc(hipStream_t& stream, hipHostFn_t& fn, void*& userData) {
-  ClPrint(amd::LOG_INFO, amd::LOG_API, "[hipGraph] Current capture node host on stream : %p",
-          stream);
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
+          "[hipGraph] Current capture node HostFunction launch on stream : %p", stream);
   if (fn == nullptr) {
     return hipErrorInvalidValue;
   }
@@ -992,6 +997,8 @@ hipError_t capturehipLaunchHostFunc(hipStream_t& stream, hipHostFn_t& fn, void*&
 // ================================================================================================
 hipError_t capturehipMallocAsync(hipStream_t stream, hipMemPool_t mem_pool, size_t size,
                                  void** dev_ptr) {
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
+          "[hipGraph] Current capture node MallocAsync on stream : %p", stream);
   auto s = reinterpret_cast<hip::Stream*>(stream);
   auto mpool = reinterpret_cast<hip::MemoryPool*>(mem_pool);
 
@@ -1029,6 +1036,8 @@ hipError_t capturehipMallocAsync(hipStream_t stream, hipMemPool_t mem_pool, size
 
 // ================================================================================================
 hipError_t capturehipFreeAsync(hipStream_t stream, void* dev_ptr) {
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
+          "[hipGraph] Current capture node FreeAsync on stream : %p", stream);
   hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
   auto mem_free_node = new hip::GraphMemFreeNode(dev_ptr);
   auto status =
diff --git a/projects/clr/hipamd/src/hip_graph_internal.cpp b/projects/clr/hipamd/src/hip_graph_internal.cpp
index 4fbd7d32b7..4a7e735b00 100644
--- a/projects/clr/hipamd/src/hip_graph_internal.cpp
+++ b/projects/clr/hipamd/src/hip_graph_internal.cpp
@@ -122,7 +122,7 @@ bool Graph::isGraphValid(Graph* pGraph) {
 // ================================================================================================
 void Graph::AddNode(const Node& node) {
   vertices_.emplace_back(node);
-  ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] Add %s(%p)",
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_CODE, "[hipGraph] Add %s(%p)",
           GetGraphNodeTypeString(node->GetType()), node);
   node->SetParentGraph(this);
 }
@@ -140,7 +140,7 @@ std::vector<Node> Graph::GetRootNodes() const {
   for (auto entry : vertices_) {
     if (entry->GetInDegree() == 0) {
       roots.push_back(entry);
-      ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] Root node: %s(%p)",
+      ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_CODE, "[hipGraph] Root node: %s(%p)",
               GetGraphNodeTypeString(entry->GetType()), entry);
     }
   }
@@ -703,6 +703,11 @@ hipError_t GraphExec::Run(hip::Stream* launch_stream) {
     repeatLaunch_ = true;
   }
 
+  ClPrint(amd::LOG_DEBUG, amd::LOG_CODE,
+          "GraphExec::Run max_streams: %d, "
+          "on device: %d, total number of nodes: %d",
+          max_streams_, launch_stream->DeviceId(), topoOrder_.size());
+
   if (max_streams_ == 1 && instantiateDeviceId_ == launch_stream->DeviceId()) {
     if (DEBUG_CLR_GRAPH_PACKET_CAPTURE) {
       // If the graph has kernels that does device side allocation,  during packet capture, heap is
diff --git a/projects/clr/hipamd/src/hip_graph_internal.hpp b/projects/clr/hipamd/src/hip_graph_internal.hpp
index 5d2b6f6a14..2b6fafe699 100644
--- a/projects/clr/hipamd/src/hip_graph_internal.hpp
+++ b/projects/clr/hipamd/src/hip_graph_internal.hpp
@@ -2327,7 +2327,7 @@ class GraphMemAllocNode final : public GraphNode {
                                      amd::Device::VmmAccess::kReadWrite);
       va_->retain();
       graph_->IncrementMemAllocNodeCount();  // Increment count of unreleased mem alloc nodes
-      ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Graph MemAlloc execute [%p-%p], %p",
+      ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM_POOL, "Graph MemAlloc execute [%p-%p], %p",
               vaddr_sub_obj->getSvmPtr(),
               reinterpret_cast<char*>(vaddr_sub_obj->getSvmPtr()) + aligned_size, memory());
     }
@@ -2390,7 +2390,7 @@ class GraphMemAllocNode final : public GraphNode {
           // be executed again
           amd::MemObjMap::AddMemObj(node_params_.dptr, va_);
         }
-        ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Graph MemAlloc create: %p", node_params_.dptr);
+        ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM_POOL, "Graph MemAlloc create: %p", node_params_.dptr);
       }
     }
     return error;
@@ -2405,7 +2405,7 @@ class GraphMemAllocNode final : public GraphNode {
         va_ = amd::MemObjMap::FindVirtualMemObj(node_params_.dptr);
         amd::MemObjMap::AddMemObj(node_params_.dptr, va_);
       }
-      ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Graph MemAlloc reserve VA: %p", node_params_.dptr);
+      ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM_POOL, "Graph MemAlloc reserve VA: %p", node_params_.dptr);
     }
     return node_params_.dptr;
   }
@@ -2470,7 +2470,7 @@ class GraphMemFreeNode : public GraphNode {
       }
       amd::MemObjMap::AddMemObj(ptr(), vaddr_mem_obj);
       graph_->DecrementMemAllocNodeCount();  // Decrement count of unreleased memalloc nodes
-      ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Graph MemFree execute: %p, %p", ptr(),
+      ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM_POOL, "Graph MemFree execute: %p, %p", ptr(),
               vaddr_sub_obj);
     }
 
@@ -2500,7 +2500,7 @@ class GraphMemFreeNode : public GraphNode {
             graph, stream->DeviceId(), *stream, amd::Command::EventWaitList{}, device_ptr_,
             amd::alignUp(va->getSize(), dev_info.virtualMemAllocGranularity_), nullptr);
         commands_.push_back(cmd);
-        ClPrint(amd::LOG_INFO, amd::LOG_MEM_POOL, "Graph FreeMem create: %p", device_ptr_);
+        ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM_POOL, "Graph FreeMem create: %p", device_ptr_);
       }
     }
     return error;
diff --git a/projects/clr/hipamd/src/hip_stream.cpp b/projects/clr/hipamd/src/hip_stream.cpp
index 5b825d0d4f..37c19bc7fc 100644
--- a/projects/clr/hipamd/src/hip_stream.cpp
+++ b/projects/clr/hipamd/src/hip_stream.cpp
@@ -476,7 +476,7 @@ hipError_t hipStreamWaitEvent_common(hipStream_t stream, hipEvent_t event, unsig
 
   hip::Stream* eventStream = reinterpret_cast<hip::Stream*>(eventStreamHandle);
   if (eventStream != nullptr && eventStream->IsEventCaptured(event) == true) {
-    ClPrint(amd::LOG_INFO, amd::LOG_API,
+    ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_API,
             "[hipGraph] Current capture node StreamWaitEvent on stream : %p, Event %p", stream,
             event);
     if (waitStream == nullptr) {
diff --git a/projects/clr/rocclr/device/blit.cpp b/projects/clr/rocclr/device/blit.cpp
index 8873cf2e4f..d58ae48395 100644
--- a/projects/clr/rocclr/device/blit.cpp
+++ b/projects/clr/rocclr/device/blit.cpp
@@ -39,7 +39,7 @@ bool HostBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
     LogError("Couldn't map device memory for host read");
     return false;
   }
-  ClPrint(amd::LOG_INFO, amd::LOG_COPY, "Using host memcpy D2H, src=%p, dst=%p, size=%zu",
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "Using host memcpy D2H, src=%p, dst=%p, size=%zu",
           (reinterpret_cast<const_address>(src) + origin[0]), dstHost, size[0]);
   // Copy memory
   std::memcpy(dstHost, reinterpret_cast<const_address>(src) + origin[0], size[0]);
@@ -163,7 +163,7 @@ bool HostBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory
     return false;
   }
 
-  ClPrint(amd::LOG_INFO, amd::LOG_COPY, "Using host memcpy H2D, src=%p, dst=%p, size=%zu",
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "Using host memcpy H2D, src=%p, dst=%p, size=%zu",
         srcHost, (reinterpret_cast<address>(dst) + origin[0]), size[0]);
   // Copy memory
   std::memcpy(reinterpret_cast<address>(dst) + origin[0], srcHost, size[0]);
@@ -294,7 +294,8 @@ bool HostBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstM
     LogError("Couldn't map destination memory");
     return false;
   }
-  ClPrint(amd::LOG_INFO, amd::LOG_COPY, "Using host memcpy for copyBuffer, src=%p, dst=%p, size=%zu",
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY,
+          "Using host memcpy for copyBuffer, src=%p, dst=%p, size=%zu",
           (reinterpret_cast<const_address>(src) + srcOrigin[0]),
           (reinterpret_cast<address>(dst) + dstOrigin[0]), size[0]);
   // Straight forward buffer copy
@@ -328,8 +329,8 @@ bool HostBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory&
     return false;
   }
 
-  ClPrint(amd::LOG_INFO, amd::LOG_COPY, "Using host memcpy for copyBufferRect, src=%p, "
-          "dst=%p, size=%zu",
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY,
+          "Using host memcpy for copyBufferRect, src=%p, dst=%p, size=%zu",
           (reinterpret_cast<const_address>(src) + srcRect.offset(0, 0, 0)),
           (reinterpret_cast<address>(dst) + dstRect.offset(0, 0, 0)), size[0]);
 
diff --git a/projects/clr/rocclr/device/devprogram.cpp b/projects/clr/rocclr/device/devprogram.cpp
index a33b09b9c6..76d487bf55 100644
--- a/projects/clr/rocclr/device/devprogram.cpp
+++ b/projects/clr/rocclr/device/devprogram.cpp
@@ -1049,7 +1049,7 @@ static void dumpCodeObject(const std::string& image) {
   char fname[30];
   static std::atomic<int> index;
   sprintf(fname, "_code_object%04d.o", index++);
-  ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Code object saved in %s\n", fname);
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_CODE, "Code object saved in %s\n", fname);
   std::ofstream ofs;
   ofs.open(fname, std::ios::binary);
   ofs << image;
@@ -2539,7 +2539,7 @@ bool Program::createKernelMetadataMap(void* binary, size_t binSize) {
 
   status = amd::Comgr::metadata_lookup(metadata_, "Kernels", &kernelsMD);
   if (status == AMD_COMGR_STATUS_SUCCESS) {
-    ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Using Code Object V2.");
+    ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_CODE, "Using Code Object V2.");
     hasKernelMD = true;
     codeObjectVer_ = 2;
   } else {
@@ -2591,13 +2591,13 @@ bool Program::createKernelMetadataMap(void* binary, size_t binSize) {
 
     if (major_version == '1') {
       if (minor_version == '0') {
-        ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Using Code Object V3.");
+        ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_CODE, "Using Code Object V3.");
         codeObjectVer_ = 3;
       } else if (minor_version == '1') {
-        ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Using Code Object V4.");
+        ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_CODE, "Using Code Object V4.");
         codeObjectVer_ = 4;
       } else if (minor_version == '2') {
-        ClPrint(amd::LOG_INFO, amd::LOG_CODE, "Using Code Object V5.");
+        ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_CODE, "Using Code Object V5.");
         codeObjectVer_ = 5;
       } else {
         ClPrint(amd::LOG_ERROR, amd::LOG_CODE,
diff --git a/projects/clr/rocclr/device/rocm/rocblit.cpp b/projects/clr/rocclr/device/rocm/rocblit.cpp
index 1b7b627d78..842d322554 100644
--- a/projects/clr/rocclr/device/rocm/rocblit.cpp
+++ b/projects/clr/rocclr/device/rocm/rocblit.cpp
@@ -310,7 +310,7 @@ bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& d
       hsa_signal_t active = gpu().Barriers().ActiveSignal(kInitSignalValueOne, gpu().timestamp());
 
       // Copy memory line by line
-      ClPrint(amd::LOG_DEBUG, amd::LOG_COPY,
+      ClPrint(amd::LOG_DEBUG, amd::LOG_COPY2,
               "HSA Async Copy Rect dst=0x%zx, src=0x%zx, wait_event=0x%zx, "
               "completion_signal=0x%zx",
               dstMem.base, srcMem.base, (wait_events.size() != 0) ? wait_events[0].handle : 0,
@@ -335,7 +335,7 @@ bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& d
           size_t dstOffset = dstRect.offset(0, y, z);
 
           // Copy memory line by line
-          ClPrint(amd::LOG_DEBUG, amd::LOG_COPY,
+          ClPrint(amd::LOG_DEBUG, amd::LOG_COPY2,
                   "HSA Async Copy wait_event=0x%zx, completion_signal=0x%zx",
                   (wait_events.size() != 0) ? wait_events[0].handle : 0, active.handle);
           hsa_status_t status = hsa_amd_memory_async_copy(
@@ -509,7 +509,7 @@ inline bool DmaBlitManager::rocrCopyBuffer(address dst, hsa_agent_t& dstAgent, c
 
   if (!kUseRegularCopyApi && engine != HwQueueEngine::Unknown) {
     copyMask = gpu().getLastUsedSdmaEngine();
-    ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Last copy mask 0x%x", copyMask);
+    ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "Last copy mask 0x%x", copyMask);
     copyMask &= (engine == HwQueueEngine::SdmaRead ? sdmaEngineReadMask_ : sdmaEngineWriteMask_);
     if (copyMask == 0) {
       // Check SDMA engine status
@@ -539,7 +539,7 @@ inline bool DmaBlitManager::rocrCopyBuffer(address dst, hsa_agent_t& dstAgent, c
       // Copy on the first available free engine if ROCr returns a valid mask
       hsa_amd_sdma_engine_id_t copyEngine = static_cast<hsa_amd_sdma_engine_id_t>(copyMask);
 
-      ClPrint(amd::LOG_DEBUG, amd::LOG_COPY,
+      ClPrint(amd::LOG_DEBUG, amd::LOG_COPY2,
               "HSA Copy copy_engine=0x%x, dst=0x%zx, src=0x%zx, "
               "size=%ld, forceSDMA=%d, engineType=%d, wait_event=0x%zx, completion_signal=0x%zx",
               copyEngine, dst, src, size, forceSDMA, engine,
@@ -554,7 +554,7 @@ inline bool DmaBlitManager::rocrCopyBuffer(address dst, hsa_agent_t& dstAgent, c
   }
 
   if (engine == HwQueueEngine::Unknown || kUseRegularCopyApi) {
-    ClPrint(amd::LOG_DEBUG, amd::LOG_COPY,
+    ClPrint(amd::LOG_DEBUG, amd::LOG_COPY2,
             "HSA Copy dst=0x%zx, src=0x%zx, size=%ld, wait_event=0x%zx, "
             "completion_signal=0x%zx, engineType=%d",
             dst, src, size, (wait_events.size() != 0) ? wait_events[0].handle : 0, active.handle,
@@ -656,7 +656,8 @@ void DmaBlitManager::getBuffer(const_address hostMem, size_t size, bool enablePi
     if (pinnedMem != nullptr) {
       Memory* pinnedMemory = dev().getRocMemory(pinnedMem);
       address pinBuffer = pinnedMemory->getDeviceMemory();
-      ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "HSA Copy Using Pinned resource size %d", xferSize);
+      ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "HSA Copy Using Pinned resource size %d",
+              xferSize);
       buffState.copySize_ = xferSize;
       buffState.buffer_ = pinBuffer + partial1 + partial2;
       buffState.pinnedMem_ = pinnedMem;
@@ -666,7 +667,8 @@ void DmaBlitManager::getBuffer(const_address hostMem, size_t size, bool enablePi
   }
   // If Memory Pinning fails, failback to staging buffer
   xferSize = std::min(xferSize, StagingXferSize);
-  ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "HSA Copy Using Staging resource size %d", xferSize);
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "HSA Copy Using Staging resource size %d",
+          xferSize);
   buffState.copySize_ = xferSize;
   buffState.buffer_ = gpu().Staging().Acquire(std::min(xferSize, StagingXferSize));
 }
@@ -711,7 +713,7 @@ bool DmaBlitManager::hsaCopyStagedOrPinned(const_address hostSrc, address hostDs
     }
     if (hostToDev) {                          // H2D Path
       if (outBuffer.pinnedMem_ == nullptr) {  // Copy to Staging Buffer
-        ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "memcpy stg buf=%p, host src=%p, size=%zu",
+        ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "memcpy stg buf=%p, host src=%p, size=%zu",
                 stagingBuffer, hostSrc + copyOffset, copysize);
         memcpy(stagingBuffer, hostSrc + copyOffset, copysize);
       }
@@ -731,7 +733,7 @@ bool DmaBlitManager::hsaCopyStagedOrPinned(const_address hostSrc, address hostDs
         if (outBuffer.pinnedMem_ == nullptr) {
           // Wait for current signal of previous rocr copy if its not pinned mem
           gpu().Barriers().WaitCurrent();
-          ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "memcpy host dst=%p, stg buf=%p, size=%zu",
+          ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "memcpy host dst=%p, stg buf=%p, size=%zu",
                   hostDst + copyOffset, stagingBuffer, copysize);
           memcpy(hostDst + copyOffset, stagingBuffer, copysize);
         }
@@ -1754,7 +1756,7 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
         // Wait for current signal of previous blit copy if its not pinned mem
         if (outBuffer.pinnedMem_ == nullptr) {
           gpu().Barriers().WaitCurrent();
-          ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "memcpy host dst=%p, stg buf=%p, size=%zu",
+          ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "memcpy host dst=%p, stg buf=%p, size=%zu",
                   (void*)(dstAddr + stagedCopyOffset), stagingBuffer, copySize);
           memcpy(dstAddr + stagedCopyOffset, stagingBuffer, copySize);
         }
@@ -1875,7 +1877,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
         copySize = outBuffer.copySize_;
         address currentDstAddr = dstAddr + stagedCopyOffset;
         if (outBuffer.pinnedMem_ == nullptr) {
-          ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "memcpy stg buf=%p, host src=%p, size=%zu",
+          ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_COPY, "memcpy stg buf=%p, host src=%p, size=%zu",
                   stagingBuffer, (void*)(srcAddr + stagedCopyOffset), copySize);
           memcpy(stagingBuffer, srcAddr + stagedCopyOffset, copySize);
         }
diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp
index 4acf5adc12..104f40f76e 100644
--- a/projects/clr/rocclr/device/rocm/rocdevice.cpp
+++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp
@@ -234,12 +234,13 @@ Device::~Device() {
       hsa_queue_t* queue = qIter->first;
       auto& qInfo = qIter->second;
       if (qInfo.hostcallBuffer_) {
-        ClPrint(amd::LOG_INFO, amd::LOG_QUEUE, "Deleting hostcall buffer %p for hardware queue %p",
-                qInfo.hostcallBuffer_, qIter->first->base_address);
+        ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_QUEUE,
+                "Deleting hostcall buffer %p for hardware queue %p", qInfo.hostcallBuffer_,
+                qIter->first->base_address);
         amd::disableHostcalls(qInfo.hostcallBuffer_);
         context().svmFree(qInfo.hostcallBuffer_);
       }
-      ClPrint(amd::LOG_INFO, amd::LOG_QUEUE, "Deleting hardware queue %p with refCount 0",
+      ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_QUEUE, "Deleting hardware queue %p with refCount 0",
               queue->base_address);
       qIter = it.erase(qIter);
       hsa_queue_destroy(queue);
@@ -1987,11 +1988,11 @@ hsa_amd_memory_pool_t Device::getHostMemoryPool(MemorySegment mem_seg,
       break;
     case kUncachedAtomics:
       if (agentInfo->ext_fine_grain_pool.handle != 0) {
-        ClPrint(amd::LOG_DEBUG, amd::LOG_MEM,
+        ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM,
                 "Using extended fine grained access system memory pool");
         segment = agentInfo->ext_fine_grain_pool;
       } else {
-        ClPrint(amd::LOG_DEBUG, amd::LOG_MEM,
+        ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM,
                 "Falling through on fine grained access system memory pool");
         segment = agentInfo->fine_grain_pool;
       }
@@ -2065,7 +2066,7 @@ void* Device::hostNumaAlloc(size_t size, size_t alignment, MemorySegment mem_seg
     LogPrintfError("get_mempolicy failed with error %ld", res);
     return ptr;
   }
-  ClPrint(amd::LOG_INFO, amd::LOG_RESOURCE,
+  ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_RESOURCE,
           "get_mempolicy() succeed with mode %d, nodeMask 0x%lx, cpuCount %zu", mode,
           *nodeMask->maskp, cpuCount);
 
@@ -2805,7 +2806,7 @@ bool Device::IsHwEventReady(const amd::Event& event, bool wait, amd::SyncPolicy
   void* hw_event =
       (event.NotifyEvent() != nullptr) ? event.NotifyEvent()->HwEvent() : event.HwEvent();
   if (hw_event == nullptr) {
-    ClPrint(amd::LOG_INFO, amd::LOG_SIG, "No HW event");
+    ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_SIG, "No HW event");
     return false;
   } else if (wait) {
     // hipEventBlockingSync
diff --git a/projects/clr/rocclr/device/rocm/rocmemory.cpp b/projects/clr/rocclr/device/rocm/rocmemory.cpp
index 6b52211161..d337f92985 100644
--- a/projects/clr/rocclr/device/rocm/rocmemory.cpp
+++ b/projects/clr/rocclr/device/rocm/rocmemory.cpp
@@ -122,7 +122,7 @@ void* Memory::allocMapTarget(const amd::Coord3D& origin, const amd::Coord3D& reg
   if (indirectMapCount_ == 1) {
     if (!allocateMapMemory(owner()->getSize())) {
       decIndMapCount();
-      DevLogPrintfError("Cannot allocate Map memory for size: %u \n", owner()->getSize());
+      DevLogPrintfError("Cannot allocate Map memory for size: %u", owner()->getSize());
       return nullptr;
     }
   } else {
@@ -180,7 +180,7 @@ void* Memory::cpuMap(device::VirtualDevice& vDev, uint flags, uint startLayer, u
   if (!isHostMemDirectAccess() && !IsPersistentDirectMap()) {
     if (!vDev.blitMgr().readBuffer(*this, mapTarget, amd::Coord3D(0), amd::Coord3D(size()), true)) {
       decIndMapCount();
-      DevLogError("Cannot read buffer \n");
+      DevLogError("Cannot read buffer");
       return nullptr;
     }
   }
@@ -192,7 +192,7 @@ void Memory::cpuUnmap(device::VirtualDevice& vDev) {
   if (!isHostMemDirectAccess() && !IsPersistentDirectMap()) {
     if (!vDev.blitMgr().writeBuffer(mapMemory_->getHostMem(), *this, amd::Coord3D(0),
                                     amd::Coord3D(size()), true)) {
-      LogError("[OCL] Fail sync the device memory on cpuUnmap");
+      LogError("Fail sync the device memory on cpuUnmap");
     }
     // Wait on CPU for the transfer
     static_cast<roc::VirtualGPU&>(vDev).releaseGpuMemoryFence();
@@ -624,7 +624,7 @@ Buffer::~Buffer() {
       // Detach the memory from HSA
       auto hsa_status = hsa_amd_ipc_memory_detach(owner()->getSvmPtr());
       if (hsa_status != HSA_STATUS_SUCCESS) {
-        LogPrintfError("HSA failed to detach memory with status: %d \n", hsa_status);
+        LogPrintfError("HSA failed to detach memory with status: %d", hsa_status);
       }
     }
   }
@@ -666,8 +666,8 @@ void Buffer::destroy() {
           }
         } else if (memFlags & ROCCLR_MEM_HSA_SIGNAL_MEMORY) {
           if (HSA_STATUS_SUCCESS != hsa_signal_destroy(signal_)) {
-            ClPrint(amd::LOG_DEBUG, amd::LOG_MEM,
-                    "[ROCClr] ROCCLR_MEM_HSA_SIGNAL_MEMORY signal destroy failed \n");
+            ClPrint(amd::LOG_ERROR, amd::LOG_MEM,
+                    "hsa_signal_destroy failed");
           }
           deviceMemory_ = nullptr;
         } else {
@@ -684,7 +684,7 @@ void Buffer::destroy() {
         }
         // destroy system memory
         if (!(amd::Os::releaseMemory(deviceMemory_, size()))) {
-          ClPrint(amd::LOG_DEBUG, amd::LOG_MEM, "[ROCClr] munmap failed \n");
+          ClPrint(amd::LOG_ERROR, amd::LOG_MEM, "munmap failed");
         }
       }
     }
@@ -764,7 +764,7 @@ bool Buffer::create(bool alloc_local) {
             reinterpret_cast<const amd::IpcBuffer*>(owner())->Handle()),
         owner()->getSize(), ipc_agents_num, dev().IpcAgents(), &orig_dev_ptr);
     if (hsa_status != HSA_STATUS_SUCCESS) {
-      LogPrintfError("HSA failed to attach IPC memory with status: %d \n", hsa_status);
+      LogPrintfError("HSA failed to attach IPC memory with status: %d", hsa_status);
       return false;
     }
     owner()->setSvmPtr(orig_dev_ptr);
@@ -779,7 +779,7 @@ bool Buffer::create(bool alloc_local) {
       // if interprocess flag is set, then the memory is importable.
       if (!dev().ImportShareableHSAHandle(owner()->getSvmPtr(),
                                           &owner()->getUserData().hsa_handle)) {
-        LogPrintfError("Importing Shareable Memory failed with os_handle: 0x%x \n",
+        LogPrintfError("Importing Shareable Memory failed with os_handle: 0x%x",
                        owner()->getSvmPtr());
         return false;
       }
@@ -835,13 +835,13 @@ bool Buffer::create(bool alloc_local) {
           if (HSA_STATUS_SUCCESS != hsa_amd_signal_create(kInitSignalValueOne, 0, nullptr,
                                                           HSA_AMD_SIGNAL_AMD_GPU_ONLY, &signal_)) {
             ClPrint(amd::LOG_ERROR, amd::LOG_MEM,
-                    "[ROCclr] ROCCLR_MEM_HSA_SIGNAL_MEMORY signal creation failed");
+                    "hsa_amd_signal_create signal failed");
             return false;
           }
           volatile hsa_signal_value_t* signalValuePtr = nullptr;
           if (HSA_STATUS_SUCCESS != hsa_amd_signal_value_pointer(signal_, &signalValuePtr)) {
             ClPrint(amd::LOG_ERROR, amd::LOG_MEM,
-                    "[ROCclr] ROCCLR_MEM_HSA_SIGNAL_MEMORY pointer query failed");
+                    "hsa_amd_signal_value_pointer failed");
             return false;
           }
 
@@ -1043,7 +1043,7 @@ bool Buffer::ExportHandle(void* handle) const {
   auto hsa_status = hsa_amd_ipc_memory_create(orig_dev_ptr, owner()->getSize(),
                                               reinterpret_cast<hsa_amd_ipc_memory_t*>(handle));
   if (hsa_status != HSA_STATUS_SUCCESS) {
-    LogPrintfError("Failed to create memory for IPC, failed with hsa_status: %d \n", hsa_status);
+    LogPrintfError("Failed to create memory for IPC, failed with hsa_status: %d", hsa_status);
     return false;
   }
   return true;
@@ -1304,7 +1304,7 @@ bool Image::create(bool alloc_local) {
                                                     permission_, &deviceImageInfo_);
 
   if (status != HSA_STATUS_SUCCESS) {
-    LogPrintfError("[OCL] Fail to allocate image memory, failed with hsa_status: %d \n", status);
+    LogPrintfError("Fail to allocate image memory, failed with hsa_status: %d", status);
     return false;
   }
 
diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp
index 1266643e7f..a9d1d58762 100644
--- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp
+++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp
@@ -773,7 +773,7 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para
         mem = memories[index];
         const void* globalAddress = *reinterpret_cast<const void* const*>(params + desc.offset_);
         if (mem == nullptr) {
-          ClPrint(amd::LOG_INFO, amd::LOG_KERN, "Arg%d: %s %s = ptr:%p ", i, desc.typeName_.c_str(),
+          ClPrint(amd::LOG_DEBUG, amd::LOG_KERN, "Arg%d: %s %s = ptr:%p ", i, desc.typeName_.c_str(),
                   desc.name_.c_str(), globalAddress);
           //! This condition is for SVM fine-grain
           if (dev().isFineGrainedSystem(true)) {
@@ -787,7 +787,7 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para
           gpuMem = static_cast<Memory*>(mem->getDeviceMemory(dev()));
 
           const void* globalAddress = *reinterpret_cast<const void* const*>(params + desc.offset_);
-          ClPrint(amd::LOG_INFO, amd::LOG_KERN, "Arg%d: %s %s = ptr:%p obj:[%p-%p]", i,
+          ClPrint(amd::LOG_DEBUG, amd::LOG_KERN, "Arg%d: %s %s = ptr:%p obj:[%p-%p]", i,
                   desc.typeName_.c_str(), desc.name_.c_str(), globalAddress,
                   gpuMem->getDeviceMemory(),
                   reinterpret_cast<address>(gpuMem->getDeviceMemory()) + mem->getSize());
@@ -874,10 +874,10 @@ bool VirtualGPU::processMemObjects(const amd::Kernel& kernel, const_address para
           if (desc.size_ > kMaxBytes) {
             bytes += "...";
           }
-          ClPrint(amd::LOG_INFO, amd::LOG_KERN, "Arg%d: %s %s = %s (size:0x%x)", i,
+          ClPrint(amd::LOG_DEBUG, amd::LOG_KERN, "Arg%d: %s %s = %s (size:0x%x)", i,
                   desc.typeName_.c_str(), desc.name_.c_str(), bytes.c_str(), desc.size_);
         } else {
-          ClPrint(amd::LOG_INFO, amd::LOG_KERN, "Arg%d: %s %s = val:0x%lx (size:0x%x)", i,
+          ClPrint(amd::LOG_DEBUG, amd::LOG_KERN, "Arg%d: %s %s = val:0x%lx (size:0x%x)", i,
                   desc.typeName_.c_str(), desc.name_.c_str(),
                   (desc.size_ == 1)   ? *reinterpret_cast<const uint8_t*>(srcArgPtr)
                   : (desc.size_ == 2) ? *reinterpret_cast<const uint16_t*>(srcArgPtr)
@@ -1621,7 +1621,8 @@ address VirtualGPU::ManagedBuffer::Acquire(uint32_t size, uint32_t alignment) {
   } else {
     // Reset the signal for the barrier packet
     hsa_signal_silent_store_relaxed(pool_signal_[active_chunk_], kInitSignalValueOne);
-    ClPrint(amd::LOG_INFO, amd::LOG_KERN, "Issue barrier to flush chunk %d", active_chunk_);
+    ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_KERN, "Issue barrier to flush chunk %d",
+            active_chunk_);
     // Currently don't skip wait signal check, because SDMA engine cna be used in staging copy
     constexpr bool kSkipSignal = false;
     // Dispatch a barrier packet into the queue
@@ -3489,7 +3490,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
                                              gpuKernel.KernargSegmentAlignment());
       command_->SetKernelName(gpuKernel.getDemangledName().c_str());
     } else {
-      ClPrint(amd::LOG_INFO, amd::LOG_KERN,
+      ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_KERN,
               "KernargSegmentByteSize = %lu "
               "KernargSegmentAlignment = %lu",
               gpuKernel.KernargSegmentByteSize(), gpuKernel.KernargSegmentAlignment());
@@ -3544,11 +3545,10 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
     // Validate privateMemSize is more than max allowed.
     size_t maxStackSize = dev().MaxStackSize();
     if (dispatchPacket.private_segment_size > maxStackSize) {
-      ClPrint(amd::LOG_INFO, amd::LOG_KERN,
+      ClPrint(amd::LOG_ERROR, amd::LOG_KERN,
               "Scratch size (%u) exceeds max allowed (%zu) for kernel : %s",
               dispatchPacket.private_segment_size, maxStackSize,
               gpuKernel.getDemangledName().c_str());
-      LogError("Scratch size exceeds max allowed.");
       return false;
     }
   }
diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.hpp b/projects/clr/rocclr/device/rocm/rocvirtual.hpp
index 3c0ddb2b6c..f12116e425 100644
--- a/projects/clr/rocclr/device/rocm/rocvirtual.hpp
+++ b/projects/clr/rocclr/device/rocm/rocvirtual.hpp
@@ -65,7 +65,7 @@ inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false, bool yi
       if (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,
                                     kTimeout100us, HSA_WAIT_STATE_ACTIVE) != 0) {
         if (HIP_SKIP_ABORT_ON_GPU_ERROR && amd::Device::IsGPUInError()) {
-          ClPrint(amd::LOG_INFO, amd::LOG_SIG,
+          ClPrint(amd::LOG_ERROR, amd::LOG_SIG,
                   "Device not Stable, while waiting for Signal ="
                   "(0x%lx) for %d ns",
                   signal.handle, kTimeout100us);
@@ -79,7 +79,7 @@ inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false, bool yi
     while (hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne,
                                      kTimeout4Secs, wait_state) != 0) {
       if (HIP_SKIP_ABORT_ON_GPU_ERROR && amd::Device::IsGPUInError()) {
-        ClPrint(amd::LOG_INFO, amd::LOG_SIG,
+        ClPrint(amd::LOG_ERROR, amd::LOG_SIG,
                 "Device not Stable, while waiting for Signal ="
                 "(0x%lx) for %d ns",
                 signal.handle, kTimeout4Secs);
diff --git a/projects/clr/rocclr/platform/command.cpp b/projects/clr/rocclr/platform/command.cpp
index c52b3e1e19..10297bac8b 100644
--- a/projects/clr/rocclr/platform/command.cpp
+++ b/projects/clr/rocclr/platform/command.cpp
@@ -160,12 +160,12 @@ bool Event::setStatus(int32_t status, uint64_t timeStamp) {
     }
 
     if (profilingInfo().enabled_) {
-      ClPrint(LOG_DEBUG, LOG_CMD, "Command %p complete (Wall: %ld, CPU: %ld, GPU: %ld us)",
+      ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Command %p complete (Wall: %ld, CPU: %ld, GPU: %ld us)",
               &command(), ((profilingInfo().end_ - epoch) / 1000),
               ((profilingInfo().submitted_ - profilingInfo().queued_) / 1000),
               ((profilingInfo().end_ - profilingInfo().start_) / 1000));
     } else {
-      ClPrint(LOG_DEBUG, LOG_CMD, "Command %p complete", &command());
+      ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Command %p complete", &command());
     }
     release();
   }
@@ -177,7 +177,7 @@ bool Event::setStatus(int32_t status, uint64_t timeStamp) {
 bool Event::resetStatus(int32_t status) {
   int32_t currentStatus = this->status();
   if (currentStatus != CL_COMPLETE) {
-    ClPrint(LOG_ERROR, LOG_CMD, "command is reset before complete current status :%d",
+    ClPrint(LOG_ERROR, LOG_CMD, "Command is reset before complete current status :%d",
             currentStatus);
   }
   if (!status_.compare_exchange_strong(currentStatus, status, std::memory_order_relaxed)) {
@@ -191,7 +191,7 @@ bool Event::resetStatus(int32_t status) {
 // ================================================================================================
 bool Event::setCallback(int32_t status, Event::CallBackFunction callback, void* data,
                         bool blocking) {
-  assert(status >= CL_COMPLETE && status <= CL_QUEUED && "invalid status");
+  assert(status >= CL_COMPLETE && status <= CL_QUEUED && "Invalid status");
 
   CallBackEntry* entry = new CallBackEntry(status, callback, data, blocking);
   if (entry == NULL) {
@@ -240,8 +240,8 @@ bool Event::awaitCompletion() {
       return false;
     }
 
-    ClPrint(LOG_DEBUG, LOG_WAIT, "Waiting for event %p to complete, current status %d", this,
-            status());
+    ClPrint(LOG_DETAIL_DEBUG, LOG_WAIT, "Waiting for event %p to complete, current status %d",
+            this, status());
     auto* queue = command().queue();
     if ((queue != nullptr) && queue->vdev()->ActiveWait()) {
       while (status() > CL_COMPLETE) {
@@ -255,7 +255,7 @@ bool Event::awaitCompletion() {
         lock_.wait();
       }
     }
-    ClPrint(LOG_DEBUG, LOG_WAIT, "Event %p wait completed", this);
+    ClPrint(LOG_DETAIL_DEBUG, LOG_WAIT, "Event %p wait completed", this);
   }
 
   return status() == CL_COMPLETE;
@@ -353,7 +353,7 @@ void Command::enqueue() {
     Agent::postEventCreate(as_cl(static_cast<Event*>(this)), type_);
   }
 
-  ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) enqueued: %p to queue: %p",
+  ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Command (%s) enqueued: %p to queue: %p",
           amd::activity_prof::getOclCommandKindString(this->type()), this, queue_);
 
   // Direct dispatch logic below will submit the command immediately, but the command status
diff --git a/projects/clr/rocclr/platform/commandqueue.cpp b/projects/clr/rocclr/platform/commandqueue.cpp
index 203b26d0d3..b965c9fad1 100644
--- a/projects/clr/rocclr/platform/commandqueue.cpp
+++ b/projects/clr/rocclr/platform/commandqueue.cpp
@@ -75,7 +75,7 @@ bool HostQueue::terminate() {
         if (GetSubmissionBatch() != nullptr) {
           auto command = new Marker(*this, false);
           if (command != nullptr) {
-            ClPrint(LOG_DEBUG, LOG_CMD, "Marker queued to ensure finish");
+            ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Marker queued to ensure finish");
             command->enqueue();
             lastCommand->release();
             lastCommand = command;
@@ -145,7 +145,7 @@ void HostQueue::finishCommand(Command* command) {
   if (command == nullptr) {
     command = getLastQueuedCommand(true);
     if (command != nullptr) {
-      ClPrint(LOG_DEBUG, LOG_CMD, "No command, awaiting complete status on host");
+      ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "No command, awaiting complete status on host");
       command->awaitCompletion();
       command->release();
     }
@@ -154,7 +154,7 @@ void HostQueue::finishCommand(Command* command) {
   // Check hardware event status for the specific command
   static constexpr bool kWaitCompletion = true;
   if (!device().IsHwEventReady(command->event(), kWaitCompletion)) {
-    ClPrint(LOG_DEBUG, LOG_CMD, "No HW event, awaiting complete status on host");
+    ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "No HW event, awaiting complete status on host");
     command->awaitCompletion();
   }
 }
@@ -181,7 +181,7 @@ void HostQueue::finish(bool cpu_wait) {
   }
 
   size_t batchSize = GetSubmissionBatchSize();
-  ClPrint(LOG_DEBUG, LOG_CMD,
+  ClPrint(LOG_DETAIL_DEBUG, LOG_CMD,
           "finish() called with batch size: %zu, cpu_wait: %d, "
           "fence dirty: %d",
           batchSize, cpu_wait, vdev()->isFenceDirty());
@@ -203,7 +203,7 @@ void HostQueue::finish(bool cpu_wait) {
   // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status
   static constexpr bool kWaitCompletion = true;
   if (cpu_wait || !device().IsHwEventReady(command->event(), kWaitCompletion, GetSyncPolicy())) {
-    ClPrint(LOG_DEBUG, LOG_CMD,
+    ClPrint(LOG_DETAIL_DEBUG, LOG_CMD,
             "No HW event or batch size is less than %zu, "
             "await command completion",
             minBatchSize);
@@ -258,14 +258,14 @@ void HostQueue::loop(device::VirtualDevice* virtualDevice) {
     // Process the command's event wait list.
     const Command::EventWaitList& events = command->eventWaitList();
     bool dependencyFailed = false;
-    ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) processing: %p ,events.size(): %d",
+    ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Command (%s) processing: %p ,events.size(): %d",
             amd::activity_prof::getOclCommandKindString(command->type()), command, events.size());
     for (const auto& it : events) {
       // Only wait if the command is enqueued into another queue.
       if (it->command().queue() != this) {
         // Runtime has to flush the current batch only if the dependent wait is blocking
         if (it->command().status() != CL_COMPLETE) {
-          ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) %p awaiting event: %p",
+          ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Command (%s) %p awaiting event: %p",
                   amd::activity_prof::getOclCommandKindString(command->type()), command, it);
           virtualDevice->flush(head, true);
           tail = head = NULL;
@@ -287,7 +287,7 @@ void HostQueue::loop(device::VirtualDevice* virtualDevice) {
       continue;
     }
 
-    ClPrint(LOG_DEBUG, LOG_CMD, "Command (%s) submitted: %p",
+    ClPrint(LOG_DETAIL_DEBUG, LOG_CMD, "Command (%s) submitted: %p",
             amd::activity_prof::getOclCommandKindString(command->type()), command);
 
     command->setStatus(CL_SUBMITTED);
diff --git a/projects/clr/rocclr/platform/runtime.cpp b/projects/clr/rocclr/platform/runtime.cpp
index 52a6c5cfd6..4ad1852e34 100644
--- a/projects/clr/rocclr/platform/runtime.cpp
+++ b/projects/clr/rocclr/platform/runtime.cpp
@@ -80,7 +80,8 @@ bool Runtime::init() {
     ClPrint(LOG_ERROR, LOG_INIT, "Runtime initialization failed");
     return false;
   }
-  ClPrint(LOG_INFO, LOG_MISC, "ROCclr version: %s", ROCCLR_VERSION_GITHASH);
+
+  ClPrint(LOG_INFO, LOG_MISC && !amd::IS_HIP, "ROCclr version: %s", ROCCLR_VERSION_GITHASH);
 
   initialized_ = true;
   pid_ = amd::Os::getProcessId();
diff --git a/projects/clr/rocclr/utils/debug.hpp b/projects/clr/rocclr/utils/debug.hpp
index ad9e92eb6b..be5ffc4517 100644
--- a/projects/clr/rocclr/utils/debug.hpp
+++ b/projects/clr/rocclr/utils/debug.hpp
@@ -41,7 +41,8 @@ enum LogLevel {
   LOG_WARNING = 2,
   LOG_INFO = 3,
   LOG_DEBUG = 4,
-  LOG_EXTRA_DEBUG = 5
+  LOG_DETAIL_DEBUG = 5,
+  LOG_EXTRA_DEBUG = 6
 };
 
 enum LogMask {