From 5ea54a902aa22e513dc6697352ef3761ee5b9b2d Mon Sep 17 00:00:00 2001
From: foreman <dl.swbuild@amd.com>
Date: Wed, 8 May 2019 19:22:02 -0400
Subject: [PATCH] P4 to Git Change 1780358 by gandryey@gera-win10 on 2019/05/08
 18:46:22

	SWDEV-79445 - OCL generic changes and code clean-up
	- Run google autoformat over the PAL backend. It will allow to enable autoformat in VS for the future changes.
	- No functional changes

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palappprofile.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#29 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.cpp#12 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.cpp#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palcounters.hpp#10 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugger.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldebugmanager.cpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#52 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#133 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#37 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d10.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d11.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldeviced3d9.cpp#3 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevicegl.cpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palgpuopen.cpp#13 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palgpuopen.hpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.cpp#78 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palkernel.hpp#28 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.cpp#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprintf.hpp#6 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#93 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.hpp#38 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#73 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#27 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.cpp#79 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palsettings.hpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paltimestamp.hpp#4 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#132 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#60 edit


[ROCm/clr commit: 699a12bfa29aaecd187031e974b015c5176a356b]
---
 .../runtime/device/pal/palappprofile.cpp      |    5 +-
 .../runtime/device/pal/palappprofile.hpp      |    2 +-
 .../clr/rocclr/runtime/device/pal/palblit.cpp |  106 +-
 .../clr/rocclr/runtime/device/pal/palblit.hpp |    9 +-
 .../rocclr/runtime/device/pal/palconstbuf.cpp |   41 +-
 .../rocclr/runtime/device/pal/palconstbuf.hpp |   81 +-
 .../rocclr/runtime/device/pal/palcounters.cpp |   12 +-
 .../rocclr/runtime/device/pal/palcounters.hpp |    3 +-
 .../rocclr/runtime/device/pal/paldebugger.hpp |    8 +-
 .../runtime/device/pal/paldebugmanager.cpp    |    2 +-
 .../clr/rocclr/runtime/device/pal/paldefs.hpp |  174 +--
 .../rocclr/runtime/device/pal/paldevice.cpp   |  410 ++++---
 .../rocclr/runtime/device/pal/paldevice.hpp   |   98 +-
 .../runtime/device/pal/paldeviced3d10.cpp     |   18 +-
 .../runtime/device/pal/paldeviced3d11.cpp     |   18 +-
 .../runtime/device/pal/paldeviced3d9.cpp      |   18 +-
 .../rocclr/runtime/device/pal/paldevicegl.cpp | 1037 +++++++++--------
 .../rocclr/runtime/device/pal/palgpuopen.cpp  |  296 +++--
 .../rocclr/runtime/device/pal/palgpuopen.hpp  |  310 +++--
 .../rocclr/runtime/device/pal/palkernel.cpp   |   98 +-
 .../rocclr/runtime/device/pal/palkernel.hpp   |   36 +-
 .../rocclr/runtime/device/pal/palmemory.cpp   |   30 +-
 .../rocclr/runtime/device/pal/palmemory.hpp   |   32 +-
 .../rocclr/runtime/device/pal/palprintf.hpp   |   35 +-
 .../rocclr/runtime/device/pal/palprogram.cpp  |   69 +-
 .../rocclr/runtime/device/pal/palprogram.hpp  |   59 +-
 .../rocclr/runtime/device/pal/palresource.cpp |  348 +++---
 .../rocclr/runtime/device/pal/palresource.hpp |  140 ++-
 .../rocclr/runtime/device/pal/palsettings.cpp |   12 +-
 .../rocclr/runtime/device/pal/palsettings.hpp |  107 +-
 .../runtime/device/pal/paltimestamp.hpp       |    5 +-
 .../rocclr/runtime/device/pal/palvirtual.cpp  |  443 ++++---
 .../rocclr/runtime/device/pal/palvirtual.hpp  |  203 ++--
 33 files changed, 2119 insertions(+), 2146 deletions(-)

diff --git a/projects/clr/rocclr/runtime/device/pal/palappprofile.cpp b/projects/clr/rocclr/runtime/device/pal/palappprofile.cpp
index e703204719..8f804911a7 100644
--- a/projects/clr/rocclr/runtime/device/pal/palappprofile.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palappprofile.cpp
@@ -11,8 +11,9 @@ namespace pal {
 
 AppProfile::AppProfile()
     : amd::AppProfile(), enableHighPerformanceState_(true), reportAsOCL12Device_(false) {
-  propertyDataMap_.insert({"HighPerfState", PropertyData(DataType_Boolean, &enableHighPerformanceState_)});
+  propertyDataMap_.insert(
+      {"HighPerfState", PropertyData(DataType_Boolean, &enableHighPerformanceState_)});
 
   propertyDataMap_.insert({"OCL12Device", PropertyData(DataType_Boolean, &reportAsOCL12Device_)});
 }
-}
+}  // namespace pal
diff --git a/projects/clr/rocclr/runtime/device/pal/palappprofile.hpp b/projects/clr/rocclr/runtime/device/pal/palappprofile.hpp
index a337517cd6..3b7f3e441d 100644
--- a/projects/clr/rocclr/runtime/device/pal/palappprofile.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palappprofile.hpp
@@ -20,4 +20,4 @@ class AppProfile : public amd::AppProfile {
   bool enableHighPerformanceState_;
   bool reportAsOCL12Device_;
 };
-}
+}  // namespace pal
diff --git a/projects/clr/rocclr/runtime/device/pal/palblit.cpp b/projects/clr/rocclr/runtime/device/pal/palblit.cpp
index 524979ee97..4370f46317 100644
--- a/projects/clr/rocclr/runtime/device/pal/palblit.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palblit.cpp
@@ -280,8 +280,8 @@ bool DmaBlitManager::writeMemoryStaged(const void* srcHost, Memory& dstMemory, M
     amd::Coord3D copySize(tmpSize, 0, 0);
 
     // Copy data into the temporary buffer, using CPU
-    if (!xferBuf.hostWrite(&gpu(), reinterpret_cast<const char*>(srcHost) + offset,
-        src, copySize, flags)) {
+    if (!xferBuf.hostWrite(&gpu(), reinterpret_cast<const char*>(srcHost) + offset, src, copySize,
+                           flags)) {
       return false;
     }
 
@@ -296,7 +296,7 @@ bool DmaBlitManager::writeMemoryStaged(const void* srcHost, Memory& dstMemory, M
     srcOffset += tmpSize;
     if ((srcOffset + tmpSize) > gpu().xferWrite().MaxSize()) {
       srcOffset = 0;
-      flags =  0;
+      flags = 0;
     } else {
       flags = Resource::NoWait;
     }
@@ -310,7 +310,7 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
   // Use host copy if memory has direct access or it's persistent
   if (setup_.disableWriteBuffer_ ||
       (gpuMem(dstMemory).isHostMemDirectAccess() &&
-      (gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
+       (gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
       gpuMem(dstMemory).isPersistentDirectMap()) {
     return HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
   } else {
@@ -335,7 +335,7 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
       // Copy memory, using pinning
       while (dstSize > 0) {
         size_t tmpSize;
-          // If it's the first iterarion, then readjust the copy size
+        // If it's the first iterarion, then readjust the copy size
         // to include alignment
         if (first) {
           pinAllocSize = amd::alignUp(pinSize + partial, PinnedMemoryAlignment);
@@ -398,7 +398,7 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
   // Use host copy if memory has direct access or it's persistent
   if (setup_.disableWriteBufferRect_ ||
       (dstMemory.isHostMemDirectAccess() &&
-      (gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
+       (gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
       gpuMem(dstMemory).isPersistentDirectMap()) {
     return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
   } else {
@@ -586,8 +586,8 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory
                                                 entire, rowPitch, slicePitch);
   } else {
     // Use PAL path for a transfer
-    result = gpuMem(srcMemory).partialMemCopyTo(gpu(), srcOrigin, dstOrigin,
-        size, gpuMem(dstMemory));
+    result =
+        gpuMem(srcMemory).partialMemCopyTo(gpu(), srcOrigin, dstOrigin, size, gpuMem(dstMemory));
 
     // Check if a HostBlit transfer is required
     if (completeOperation_ && !result) {
@@ -947,8 +947,8 @@ static void setArgument(amd::Kernel* kernel, size_t index, size_t size, const vo
 
   void* param = kernel->parameters().values() + desc.offset_;
   assert((desc.type_ == T_POINTER || value != NULL ||
-    (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL)) &&
-    "not a valid local mem arg");
+          (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL)) &&
+         "not a valid local mem arg");
 
   uint32_t uint32_value = 0;
   uint64_t uint64_value = 0;
@@ -957,14 +957,15 @@ static void setArgument(amd::Kernel* kernel, size_t index, size_t size, const vo
   if (desc.type_ == T_POINTER && (desc.addressQualifier_ != CL_KERNEL_ARG_ADDRESS_LOCAL)) {
     if ((value == NULL) || (static_cast<const cl_mem*>(value) == NULL)) {
       reinterpret_cast<Memory**>(kernel->parameters().values() +
-        kernel->parameters().memoryObjOffset())[desc.info_.arrayIndex_] = nullptr;
+                                 kernel->parameters().memoryObjOffset())[desc.info_.arrayIndex_] =
+          nullptr;
     } else {
       // convert cl_mem to amd::Memory*, return false if invalid.
-      LP64_SWITCH(uint32_value, uint64_value) = static_cast<uintptr_t>((
-        *static_cast<Memory* const*>(value))->virtualAddress());
+      LP64_SWITCH(uint32_value, uint64_value) =
+          static_cast<uintptr_t>((*static_cast<Memory* const*>(value))->virtualAddress());
       reinterpret_cast<Memory**>(kernel->parameters().values() +
-        kernel->parameters().memoryObjOffset())[desc.info_.arrayIndex_] =
-        *static_cast<Memory* const*>(value);
+                                 kernel->parameters().memoryObjOffset())[desc.info_.arrayIndex_] =
+          *static_cast<Memory* const*>(value);
       // Note: Special case for image SRD, which is 64 bit always
       if (LP64_SWITCH(true, false) &&
           (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ImageObject)) {
@@ -1018,8 +1019,8 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
   bool releaseView = false;
   bool result = false;
   amd::Image::Format newFormat(gpuMem(dstMemory).desc().format_);
-  bool swapLayer = (dstView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) &&
-       dev().settings().gfx10Plus_;
+  bool swapLayer =
+      (dstView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) && dev().settings().gfx10Plus_;
 
   // Find unsupported formats
   for (uint i = 0; i < RejectedFormatDataTotal; ++i) {
@@ -1078,10 +1079,10 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
     // Swap the Y and Z components, apparently gfx10 HW expects
     // layer in Z
     if (swapLayer) {
-        globalWorkSize[2] = globalWorkSize[1];
-        globalWorkSize[1] = 1;
-        localWorkSize[2] = localWorkSize[1];
-        localWorkSize[1] = 1;
+      globalWorkSize[2] = globalWorkSize[1];
+      globalWorkSize[1] = 1;
+      localWorkSize[2] = localWorkSize[1];
+      localWorkSize[1] = 1;
     }
   } else {
     globalWorkSize[0] = amd::alignUp(size[0], 8);
@@ -1114,10 +1115,10 @@ bool KernelBlitManager::copyBufferToImageKernel(device::Memory& srcMemory,
   cl_int copySize[4] = {(cl_int)size[0], (cl_int)size[1], (cl_int)size[2], 0};
 
   if (swapLayer) {
-      dstOrg[2] = dstOrg[1];
-      dstOrg[1] = 0;
-      copySize[2] = copySize[1];
-      copySize[1] = 1;
+    dstOrg[2] = dstOrg[1];
+    dstOrg[1] = 0;
+    copySize[2] = copySize[1];
+    copySize[1] = 1;
   }
 
   setArgument(kernels_[blitType], 3, sizeof(dstOrg), dstOrg);
@@ -1338,8 +1339,8 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
   bool releaseView = false;
   bool result = false;
   amd::Image::Format newFormat(gpuMem(srcMemory).desc().format_);
-  bool swapLayer = (srcView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) &&
-       dev().settings().gfx10Plus_;
+  bool swapLayer =
+      (srcView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) && dev().settings().gfx10Plus_;
 
   // Find unsupported formats
   for (uint i = 0; i < RejectedFormatDataTotal; ++i) {
@@ -1398,10 +1399,10 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
     // Swap the Y and Z components, apparently gfx10 HW expects
     // layer in Z
     if (swapLayer) {
-        globalWorkSize[2] = globalWorkSize[1];
-        globalWorkSize[1] = 1;
-        localWorkSize[2] = localWorkSize[1];
-        localWorkSize[1] = 1;
+      globalWorkSize[2] = globalWorkSize[1];
+      globalWorkSize[1] = 1;
+      localWorkSize[2] = localWorkSize[1];
+      localWorkSize[1] = 1;
     }
   } else {
     globalWorkSize[0] = amd::alignUp(size[0], 8);
@@ -1426,10 +1427,10 @@ bool KernelBlitManager::copyImageToBufferKernel(device::Memory& srcMemory,
   cl_int srcOrg[4] = {(cl_int)srcOrigin[0], (cl_int)srcOrigin[1], (cl_int)srcOrigin[2], 0};
   cl_int copySize[4] = {(cl_int)size[0], (cl_int)size[1], (cl_int)size[2], 0};
   if (swapLayer) {
-      srcOrg[2] = srcOrg[1];
-      srcOrg[1] = 0;
-      copySize[2] = copySize[1];
-      copySize[1] = 1;
+    srcOrg[2] = srcOrg[1];
+    srcOrg[1] = 0;
+    copySize[2] = copySize[1];
+    copySize[1] = 1;
   }
   setArgument(kernels_[blitType], 4, sizeof(srcOrg), srcOrg);
   uint32_t memFmtSize = gpuMem(srcMemory).elementSize();
@@ -1570,7 +1571,7 @@ bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dst
   // Program source origin
   cl_int srcOrg[4] = {(cl_int)srcOrigin[0], (cl_int)srcOrigin[1], (cl_int)srcOrigin[2], 0};
   if ((gpuMem(srcMemory).desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) &&
-    dev().settings().gfx10Plus_) {
+      dev().settings().gfx10Plus_) {
     srcOrg[3] = 1;
   }
   setArgument(kernels_[blitType], 2, sizeof(srcOrg), srcOrg);
@@ -1578,7 +1579,7 @@ bool KernelBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dst
   // Program destinaiton origin
   cl_int dstOrg[4] = {(cl_int)dstOrigin[0], (cl_int)dstOrigin[1], (cl_int)dstOrigin[2], 0};
   if ((gpuMem(dstMemory).desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) &&
-    dev().settings().gfx10Plus_) {
+      dev().settings().gfx10Plus_) {
     dstOrg[3] = 1;
   }
   setArgument(kernels_[blitType], 3, sizeof(dstOrg), dstOrg);
@@ -1700,16 +1701,15 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
       amdMemory = pinHostMemory(srcHost, pinSize, partial);
       if (amdMemory == nullptr) {
         // Force SW copy
-        result = HostBlitManager::writeImage(srcHost, dstMemory,
-                    origin, size, rowPitch, slicePitch, entire);
+        result = HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
+                                             entire);
         synchronize();
         return result;
       }
       // Get device memory for this virtual device
       srcMemory = dev().getGpuMemory(amdMemory);
       pinned = true;
-    }
-    else {
+    } else {
       srcMemory = &gpu().xferWrite().Acquire(pinSize);
       srcMemory->hostWrite(&gpu(), srcHost, 0, pinSize, Resource::NoWait);
       pinned = false;
@@ -1951,7 +1951,7 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
   // Use host copy if memory has direct access or it's persistent
   if (setup_.disableWriteBuffer_ ||
       (gpuMem(dstMemory).isHostMemDirectAccess() &&
-      (gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
+       (gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
       (gpuMem(dstMemory).memoryType() == Resource::Persistent)) {
     result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
     synchronize();
@@ -2002,7 +2002,7 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
   // Use host copy if memory has direct access or it's persistent
   if (setup_.disableWriteBufferRect_ ||
       (gpuMem(dstMemory).isHostMemDirectAccess() &&
-      (gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
+       (gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
       gpuMem(dstMemory).isPersistentDirectMap()) {
     result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
     synchronize();
@@ -2206,8 +2206,8 @@ bool KernelBlitManager::fillImage(device::Memory& memory, const void* pattern,
   size_t localWorkSize[3];
   Memory* memView = &gpuMem(memory);
   amd::Image::Format newFormat(gpuMem(memory).owner()->asImage()->getImageFormat());
-  bool swapLayer = (memView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) &&
-       dev().settings().gfx10Plus_;
+  bool swapLayer =
+      (memView->desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) && dev().settings().gfx10Plus_;
 
   // Program the kernels workload depending on the fill dimensions
   fillType = FillImage;
@@ -2274,10 +2274,10 @@ bool KernelBlitManager::fillImage(device::Memory& memory, const void* pattern,
     // Swap the Y and Z components, apparently gfx10 HW expects
     // layer in Z
     if (swapLayer) {
-        globalWorkSize[2] = globalWorkSize[1];
-        globalWorkSize[1] = 1;
-        localWorkSize[2] = localWorkSize[1];
-        localWorkSize[1] = 1;
+      globalWorkSize[2] = globalWorkSize[1];
+      globalWorkSize[1] = 1;
+      localWorkSize[2] = localWorkSize[1];
+      localWorkSize[1] = 1;
     }
   } else {
     globalWorkSize[0] = amd::alignUp(globalWorkSize[0], 8);
@@ -2297,10 +2297,10 @@ bool KernelBlitManager::fillImage(device::Memory& memory, const void* pattern,
   cl_int fillOrigin[4] = {(cl_int)origin[0], (cl_int)origin[1], (cl_int)origin[2], 0};
   cl_int fillSize[4] = {(cl_int)size[0], (cl_int)size[1], (cl_int)size[2], 0};
   if (swapLayer) {
-      fillOrigin[2] = fillOrigin[1];
-      fillOrigin[1] = 0;
-      fillSize[2] = fillSize[1];
-      fillSize[1] = 1;
+    fillOrigin[2] = fillOrigin[1];
+    fillOrigin[1] = 0;
+    fillSize[2] = fillSize[1];
+    fillSize[1] = 1;
   }
   setArgument(kernels_[fillType], 4, sizeof(fillOrigin), fillOrigin);
   setArgument(kernels_[fillType], 5, sizeof(fillSize), fillSize);
diff --git a/projects/clr/rocclr/runtime/device/pal/palblit.hpp b/projects/clr/rocclr/runtime/device/pal/palblit.hpp
index fe52ac2a59..4c9769d678 100644
--- a/projects/clr/rocclr/runtime/device/pal/palblit.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palblit.hpp
@@ -27,7 +27,7 @@ class DmaBlitManager : public device::HostBlitManager {
   //! Constructor
   DmaBlitManager(VirtualGPU& gpu,       //!< Virtual GPU to be used for blits
                  Setup setup = Setup()  //!< Specifies HW accelerated blits
-                 );
+  );
 
   //! Destructor
   virtual ~DmaBlitManager() {}
@@ -211,7 +211,7 @@ class KernelBlitManager : public DmaBlitManager {
   //! Constructor
   KernelBlitManager(VirtualGPU& gpu,       //!< Virtual GPU to be used for blits
                     Setup setup = Setup()  //!< Specifies HW accelerated blits
-                    );
+  );
 
   //! Destructor
   virtual ~KernelBlitManager();
@@ -382,7 +382,7 @@ class KernelBlitManager : public DmaBlitManager {
 
   //! Creates a program for all blit operations
   bool createProgram(Device& device  //!< Device object
-                     );
+  );
 
   //! Creates a view memory object
   Memory* createView(const Memory& parent,         //!< Parent memory object
@@ -409,4 +409,5 @@ static const char* BlitName[KernelBlitManager::BlitTotal] = {
     "fillImage",         "scheduler",
 };
 
-/*@}*/} // namespace pal
+/*@}*/  // namespace pal
+}  // namespace pal
diff --git a/projects/clr/rocclr/runtime/device/pal/palconstbuf.cpp b/projects/clr/rocclr/runtime/device/pal/palconstbuf.cpp
index ccd6dfb583..3bf5be1fd0 100644
--- a/projects/clr/rocclr/runtime/device/pal/palconstbuf.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palconstbuf.cpp
@@ -11,12 +11,12 @@ namespace pal {
 
 // ================================================================================================
 ManagedBuffer::ManagedBuffer(VirtualGPU& gpu, uint32_t size)
-  : gpu_(gpu)
-  , pool_(MaxNumberOfBuffers)
-  , activeBuffer_(0)
-  , size_(size)
-  , wrtOffset_(0)
-  , wrtAddress_(nullptr) {}
+    : gpu_(gpu),
+      pool_(MaxNumberOfBuffers),
+      activeBuffer_(0),
+      size_(size),
+      wrtOffset_(0),
+      wrtAddress_(nullptr) {}
 
 // ================================================================================================
 void ManagedBuffer::release() {
@@ -40,8 +40,8 @@ bool ManagedBuffer::create(Resource::MemoryType type) {
     pool_[i].buf->memRef()->gpu_ = &gpu_;
     void* wrtAddress = pool_[i].buf->map(&gpu_);
     if (wrtAddress == nullptr) {
-        LogPrintfError("We couldn't map HW constant buffer, size(%d)!", size_);
-        return false;
+      LogPrintfError("We couldn't map HW constant buffer, size(%d)!", size_);
+      return false;
     }
     // Make sure OCL touches every buffer in the queue to avoid delays on the first submit
     uint dummy = 0;
@@ -94,15 +94,10 @@ void ManagedBuffer::pinGpuEvent() {
 
 // ================================================================================================
 ConstantBuffer::ConstantBuffer(ManagedBuffer& mbuf, uint32_t size)
-  : mbuf_(mbuf)
-  , sys_mem_copy_(nullptr)
-  , size_(size)
-{}
+    : mbuf_(mbuf), sys_mem_copy_(nullptr), size_(size) {}
 
 // ================================================================================================
-ConstantBuffer::~ConstantBuffer() {
-  amd::AlignedMemory::deallocate(sys_mem_copy_);
-}
+ConstantBuffer::~ConstantBuffer() { amd::AlignedMemory::deallocate(sys_mem_copy_); }
 
 // ================================================================================================
 bool ConstantBuffer::Create() {
@@ -118,8 +113,8 @@ bool ConstantBuffer::Create() {
 
 // ================================================================================================
 uint64_t ConstantBuffer::UploadDataToHw(uint32_t size) const {
-  uint64_t  vm_address;
-  address   cpu_address = mbuf_.reserve(size, &vm_address);
+  uint64_t vm_address;
+  address cpu_address = mbuf_.reserve(size, &vm_address);
   // Update memory with new CB data
   memcpy(cpu_address, sys_mem_copy_, size);
   return vm_address;
@@ -127,8 +122,8 @@ uint64_t ConstantBuffer::UploadDataToHw(uint32_t size) const {
 
 // ================================================================================================
 uint64_t ConstantBuffer::UploadDataToHw(const void* sysmem, uint32_t size) const {
-  uint64_t  vm_address;
-  address   cpu_address = mbuf_.reserve(size, &vm_address);
+  uint64_t vm_address;
+  address cpu_address = mbuf_.reserve(size, &vm_address);
   // Update memory with new CB data
   memcpy(cpu_address, sysmem, size);
   return vm_address;
@@ -136,9 +131,7 @@ uint64_t ConstantBuffer::UploadDataToHw(const void* sysmem, uint32_t size) const
 
 // ================================================================================================
 XferBuffer::XferBuffer(const Device& device, ManagedBuffer& mbuf, uint32_t size)
-  : buffer_view_(device, size)
-  , mbuf_(mbuf)
-  , size_(size) {
+    : buffer_view_(device, size), mbuf_(mbuf), size_(size) {
   // Create a view for access
   Resource::ViewParams params = {};
   params.gpu_ = &mbuf_.gpu();
@@ -151,9 +144,9 @@ XferBuffer::XferBuffer(const Device& device, ManagedBuffer& mbuf, uint32_t size)
 
 // ================================================================================================
 Memory& XferBuffer::Acquire(uint32_t size) {
-  uint64_t  vm_address;
+  uint64_t vm_address;
   // Reserve space in the managed buffer
-  address   cpu_address = mbuf_.reserve(size, &vm_address);
+  address cpu_address = mbuf_.reserve(size, &vm_address);
   // Update a view for access
   buffer_view_.updateView(mbuf_.activeMemory(), vm_address - mbuf_.vmAddress(), size);
   return buffer_view_;
diff --git a/projects/clr/rocclr/runtime/device/pal/palconstbuf.hpp b/projects/clr/rocclr/runtime/device/pal/palconstbuf.hpp
index c1853b0537..5a2279eec5 100644
--- a/projects/clr/rocclr/runtime/device/pal/palconstbuf.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palconstbuf.hpp
@@ -12,9 +12,9 @@ namespace pal {
 class ManagedBuffer : public amd::EmbeddedObject {
  public:
   //! Constructor for the ConstBuffer class
-  ManagedBuffer(VirtualGPU& gpu,    //!< Virtual GPU device object
-                uint32_t    size    //!< size of the managed buffers in bytes
-                );
+  ManagedBuffer(VirtualGPU& gpu,  //!< Virtual GPU device object
+                uint32_t size     //!< size of the managed buffers in bytes
+  );
   ~ManagedBuffer() {}
 
   //! Creates the managed buffers
@@ -50,8 +50,8 @@ class ManagedBuffer : public amd::EmbeddedObject {
 
  private:
   struct TimeStampedBuffer {
-    Memory*   buf;
-    GpuEvent  events[AllEngines];
+    Memory* buf;
+    GpuEvent events[AllEngines];
   };
 
   //! The maximum number of the managed buffers
@@ -63,21 +63,21 @@ class ManagedBuffer : public amd::EmbeddedObject {
   //! Disable operator=
   ManagedBuffer& operator=(const ManagedBuffer&) = delete;
 
-  VirtualGPU& gpu_;                 //!< Virtual GPU object
-  std::vector<TimeStampedBuffer>  pool_;   //!< Buffers for management
-  uint32_t  activeBuffer_;          //!< Current active buffer
-  uint32_t  size_;                  //!< Constant buffer size
-  uint32_t  wrtOffset_;             //!< Current write offset
-  address   wrtAddress_;            //!< Write address in CB
+  VirtualGPU& gpu_;                      //!< Virtual GPU object
+  std::vector<TimeStampedBuffer> pool_;  //!< Buffers for management
+  uint32_t activeBuffer_;                //!< Current active buffer
+  uint32_t size_;                        //!< Constant buffer size
+  uint32_t wrtOffset_;                   //!< Current write offset
+  address wrtAddress_;                   //!< Write address in CB
 };
 
 //! Constant buffer
 class ConstantBuffer : public amd::HeapObject {
-public:
+ public:
   //! Constructor for the ConstBuffer class
   ConstantBuffer(ManagedBuffer& mbuf,  //!< Managed buffer
-                 uint32_t       size   //!< Max size of the constant buffer
-                 );
+                 uint32_t size         //!< Max size of the constant buffer
+  );
 
   //! Destructor for the ConstBuffer class
   ~ConstantBuffer();
@@ -86,18 +86,18 @@ public:
   bool Create();
 
   /*! \brief Uploads current constant buffer data from sysMemCopy_ to HW
-  *
-  *  \return GPU address for the uploaded data
-  */
+   *
+   *  \return GPU address for the uploaded data
+   */
   uint64_t UploadDataToHw(uint32_t size  //!< real data size for upload
                           ) const;
 
   /*! \brief Uploads current constant buffer data from sysMemCopy_ to HW
-  *
-  *  \return GPU address for the uploaded data
-  */
+   *
+   *  \return GPU address for the uploaded data
+   */
   uint64_t UploadDataToHw(const void* sysmem,  //!< Pointer to the data for upload
-                          uint32_t    size     //!< Real data size for upload
+                          uint32_t size        //!< Real data size for upload
                           ) const;
 
   //! Returns a pointer to the system memory copy for CB
@@ -106,52 +106,55 @@ public:
   //! Returns active GPU buffer
   Memory* ActiveMemory() const { return mbuf_.activeMemory(); }
 
-private:
+ private:
   //! Disable copy constructor
   ConstantBuffer(const ConstantBuffer&) = delete;
 
   //! Disable operator=
   ConstantBuffer& operator=(const ConstantBuffer&) = delete;
 
-  ManagedBuffer&  mbuf_;    //!< Managed buffer on GPU
-  address   sys_mem_copy_;  //!< System memory copy
-  uint32_t  size_;          //!< Constant buffer size
+  ManagedBuffer& mbuf_;   //!< Managed buffer on GPU
+  address sys_mem_copy_;  //!< System memory copy
+  uint32_t size_;         //!< Constant buffer size
 };
 
 //! Staging buffer
 class XferBuffer : public amd::EmbeddedObject {
-public:
+ public:
   //! Constructor for the ConstBuffer class
-  XferBuffer(const Device& device,  //!< Active GPU device 
+  XferBuffer(const Device& device,  //!< Active GPU device
              ManagedBuffer& mbuf,   //!< Managed buffer
-             uint32_t       size    //!< Maximum size of the transfer buffer
+             uint32_t size          //!< Maximum size of the transfer buffer
   );
 
   //! Destructor for the ConstBuffer class
   ~XferBuffer() {}
 
   /*! \brief Acquires free memory from the managed buffer
-  *
-  *  \return GPU memory object associated with free memory
-  */
-  Memory& Acquire(uint32_t size     //!< data size for transfers
-                  );
+   *
+   *  \return GPU memory object associated with free memory
+   */
+  Memory& Acquire(uint32_t size  //!< data size for transfers
+  );
 
   //! Releases memory object used in the staging transfer
   void Release(Memory& mem  //!< Memory object for release
-               ) { buffer_view_.updateView(nullptr, 0, 0); }
+  ) {
+    buffer_view_.updateView(nullptr, 0, 0);
+  }
 
   size_t MaxSize() const { return static_cast<size_t>(size_); }
 
-private:
+ private:
   //! Disable copy constructor
   XferBuffer(const XferBuffer&) = delete;
 
   //! Disable operator=
   XferBuffer& operator=(const XferBuffer&) = delete;
 
-  Memory  buffer_view_;     //!< Buffer view returned in the acquire
-  ManagedBuffer&  mbuf_;    //!< Managed buffer on GPU
-  uint32_t  size_;          //!< Mx staging buffer size
+  Memory buffer_view_;   //!< Buffer view returned in the acquire
+  ManagedBuffer& mbuf_;  //!< Managed buffer on GPU
+  uint32_t size_;        //!< Mx staging buffer size
 };
-/*@}*/} // namespace pal
+/*@}*/  // namespace pal
+}  // namespace pal
diff --git a/projects/clr/rocclr/runtime/device/pal/palcounters.cpp b/projects/clr/rocclr/runtime/device/pal/palcounters.cpp
index 2be9c3d50e..3af5ca0cf2 100644
--- a/projects/clr/rocclr/runtime/device/pal/palcounters.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palcounters.cpp
@@ -676,12 +676,12 @@ void PerfCounter::convertInfo() {
       break;
     case Pal::GfxIpLevel::GfxIp10:
     case Pal::GfxIpLevel::GfxIp10_1:
-        if (info_.blockIndex_ < gfx10BlockIdPal.size()) {
-            auto p = gfx10BlockIdPal[info_.blockIndex_];
-            info_.blockIndex_ = std::get<0>(p);
-            info_.counterIndex_ = std::get<1>(p);
-        }
-        break;
+      if (info_.blockIndex_ < gfx10BlockIdPal.size()) {
+        auto p = gfx10BlockIdPal[info_.blockIndex_];
+        info_.blockIndex_ = std::get<0>(p);
+        info_.counterIndex_ = std::get<1>(p);
+      }
+      break;
     default:
       Unimplemented();
       break;
diff --git a/projects/clr/rocclr/runtime/device/pal/palcounters.hpp b/projects/clr/rocclr/runtime/device/pal/palcounters.hpp
index ea55cc1600..4632c8b277 100644
--- a/projects/clr/rocclr/runtime/device/pal/palcounters.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palcounters.hpp
@@ -84,8 +84,7 @@ class PerfCounter : public device::PerfCounter {
               cl_uint blockIndex,           //!< HW block index
               cl_uint counterIndex,         //!< Counter index within the block
               cl_uint eventIndex)           //!< Event index for profiling
-      : gpuDevice_(device),
-        palRef_(palRef) {
+      : gpuDevice_(device), palRef_(palRef) {
     info_.blockIndex_ = blockIndex;
     info_.counterIndex_ = counterIndex;
     info_.eventIndex_ = eventIndex;
diff --git a/projects/clr/rocclr/runtime/device/pal/paldebugger.hpp b/projects/clr/rocclr/runtime/device/pal/paldebugger.hpp
index cb1d4dd981..70812b4028 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldebugger.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldebugger.hpp
@@ -98,10 +98,10 @@ struct HwDebugWaveAddr {
 };
 
 /*! \brief Kernel code information
-*
-*   This structure contains the pointer of mapped kernel code for host access
-*   and its size (in bytes)
-*/
+ *
+ *   This structure contains the pointer of mapped kernel code for host access
+ *   and its size (in bytes)
+ */
 struct AqlCodeInfo {
   amd_kernel_code_t* aqlCode_;  //! pointer of AQL code to allow host access
   uint32_t aqlCodeSize_;        //! size of AQL code
diff --git a/projects/clr/rocclr/runtime/device/pal/paldebugmanager.cpp b/projects/clr/rocclr/runtime/device/pal/paldebugmanager.cpp
index 124de40991..f8fdac9d0e 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldebugmanager.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldebugmanager.cpp
@@ -143,7 +143,7 @@ void GpuDebugManager::unregisterDebugger() {
 
 void GpuDebugManager::flushCache(uint32_t mask) {
   HwDbgGpuCacheMask cacheMask(mask);
-  //device()->xferQueue()->flushCuCaches(cacheMask);
+  // device()->xferQueue()->flushCuCaches(cacheMask);
 }
 
 
diff --git a/projects/clr/rocclr/runtime/device/pal/paldefs.hpp b/projects/clr/rocclr/runtime/device/pal/paldefs.hpp
index 989efc51d9..fdd8213cee 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldefs.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldefs.hpp
@@ -47,9 +47,9 @@ struct GpuEvent {
   static constexpr uint32_t InvalidID = ((1 << 30) - 1);
 
   struct {
-    uint32_t id_ : 30;        ///< Actual event id
-    uint32_t modified_ : 1;   ///< Resource associated with the event was modified
-    uint32_t engineId_ : 1;   ///< Type of the id
+    uint32_t id_ : 30;       ///< Actual event id
+    uint32_t modified_ : 1;  ///< Resource associated with the event was modified
+    uint32_t engineId_ : 1;  ///< Type of the id
   };
   //! GPU event default constructor
   GpuEvent() : id_(InvalidID), modified_(false), engineId_(MainEngine) {}
@@ -63,8 +63,11 @@ struct GpuEvent {
   void invalidate() { id_ = InvalidID; }
 
   // Overwrite default assign operator to preserve modified_ field
-  GpuEvent& operator=(const GpuEvent& evt)
-    { id_ = evt.id_; engineId_ = evt.engineId_; return *this; }
+  GpuEvent& operator=(const GpuEvent& evt) {
+    id_ = evt.id_;
+    engineId_ = evt.engineId_;
+    return *this;
+  }
 };
 
 /*! \addtogroup PAL
@@ -113,87 +116,110 @@ const static uint HsaSamplerObjectAlignment = 16;
 const static uint DeviceQueueMaskSize = 32;
 
 struct AMDDeviceInfo {
-  const char* targetName_;     //!< Target name
-  const char* machineTarget_;  //!< Machine target
-  const char* machineTargetLC_;//!< Machine target for LC
-  uint simdPerCU_;             //!< Number of SIMDs per CU
-  uint simdWidth_;             //!< Number of workitems processed per SIMD
-  uint simdInstructionWidth_;  //!< Number of instructions processed per SIMD
-  uint memChannelBankWidth_;   //!< Memory channel bank width
-  uint localMemSizePerCU_;     //!< Local memory size per CU
-  uint localMemBanks_;         //!< Number of banks of local memory
-  uint gfxipVersionLC_;        //!< The core engine GFXIP version for LC
-  uint gfxipVersion_;          //!< The core engine GFXIP version
-  bool xnackEnabled_;          //!< Enable XNACK feature
+  const char* targetName_;       //!< Target name
+  const char* machineTarget_;    //!< Machine target
+  const char* machineTargetLC_;  //!< Machine target for LC
+  uint simdPerCU_;               //!< Number of SIMDs per CU
+  uint simdWidth_;               //!< Number of workitems processed per SIMD
+  uint simdInstructionWidth_;    //!< Number of instructions processed per SIMD
+  uint memChannelBankWidth_;     //!< Memory channel bank width
+  uint localMemSizePerCU_;       //!< Local memory size per CU
+  uint localMemBanks_;           //!< Number of banks of local memory
+  uint gfxipVersionLC_;          //!< The core engine GFXIP version for LC
+  uint gfxipVersion_;            //!< The core engine GFXIP version
+  bool xnackEnabled_;            //!< Enable XNACK feature
 };
 
 static const AMDDeviceInfo DeviceInfo[] = {
-  /* Unknown */ {"", "unknown", "", 4, 16, 1, 256, 64 * Ki, 32, 0, 0, false},
-  /* Tahiti */ {"", "tahiti", "", 4, 16, 1, 256, 64 * Ki, 32, 600, 600, false},
-  /* Pitcairn */ {"", "pitcairn", "", 4, 16, 1, 256, 64 * Ki, 32, 600, 600, false},
-  /* Capeverde */ {"", "bonaire", "", 4, 16, 1, 256, 64 * Ki, 32, 700, 700, false},
-  /* Oland */ {"", "oland", "", 4, 16, 1, 256, 64 * Ki, 32, 600, 600, false},
-  /* Hainan */ {"", "hainan", "", 4, 16, 1, 256, 64 * Ki, 32, 600, 600, false},
+    /* Unknown */ {"", "unknown", "", 4, 16, 1, 256, 64 * Ki, 32, 0, 0, false},
+    /* Tahiti */ {"", "tahiti", "", 4, 16, 1, 256, 64 * Ki, 32, 600, 600, false},
+    /* Pitcairn */ {"", "pitcairn", "", 4, 16, 1, 256, 64 * Ki, 32, 600, 600, false},
+    /* Capeverde */ {"", "bonaire", "", 4, 16, 1, 256, 64 * Ki, 32, 700, 700, false},
+    /* Oland */ {"", "oland", "", 4, 16, 1, 256, 64 * Ki, 32, 600, 600, false},
+    /* Hainan */ {"", "hainan", "", 4, 16, 1, 256, 64 * Ki, 32, 600, 600, false},
 
-  /* Bonaire */ {"Bonaire", "bonaire", "", 4, 16, 1, 256, 64 * Ki, 32, 700, 700, false},
-  /* Hawaii */ {"Hawaii", "hawaii", "", 4, 16, 1, 256, 64 * Ki, 32, 701, 701, false},
-  /* Hawaii */ {"", "grenada", "", 4, 16, 1, 256, 64 * Ki, 32, 701, 701, false},
-  /* Hawaii */ {"", "maui", "", 4, 16, 1, 256, 64 * Ki, 32, 701, 701, false},
+    /* Bonaire */ {"Bonaire", "bonaire", "", 4, 16, 1, 256, 64 * Ki, 32, 700, 700, false},
+    /* Hawaii */ {"Hawaii", "hawaii", "", 4, 16, 1, 256, 64 * Ki, 32, 701, 701, false},
+    /* Hawaii */ {"", "grenada", "", 4, 16, 1, 256, 64 * Ki, 32, 701, 701, false},
+    /* Hawaii */ {"", "maui", "", 4, 16, 1, 256, 64 * Ki, 32, 701, 701, false},
 
-  /* Kalindi */ {"Kalindi", "kalindi", "", 4, 16, 1, 256, 64 * Ki, 32, 702, 702, false},
-  /* Godavari */ {"Mullins", "mullins", "", 4, 16, 1, 256, 64 * Ki, 32, 702, 702, false},
-  /* Spectre */ {"Spectre", "spectre", "", 4, 16, 1, 256, 64 * Ki, 32, 701, 701, false},
-  /* Spooky */ {"Spooky", "spooky", "", 4, 16, 1, 256, 64 * Ki, 32, 701, 701, false},
+    /* Kalindi */ {"Kalindi", "kalindi", "", 4, 16, 1, 256, 64 * Ki, 32, 702, 702, false},
+    /* Godavari */ {"Mullins", "mullins", "", 4, 16, 1, 256, 64 * Ki, 32, 702, 702, false},
+    /* Spectre */ {"Spectre", "spectre", "", 4, 16, 1, 256, 64 * Ki, 32, 701, 701, false},
+    /* Spooky */ {"Spooky", "spooky", "", 4, 16, 1, 256, 64 * Ki, 32, 701, 701, false},
 
-  /* Carrizo */ {"Carrizo", "carrizo", "", 4, 16, 1, 256, 64 * Ki, 32, 801, 801,false},
-  /* Bristol */ {"Bristol Ridge", "carrizo", "", 4, 16, 1, 256, 64 * Ki, 32, 801, 801, false},
-  /* Stoney */ {"Stoney", "stoney", "", 4, 16, 1, 256, 64 * Ki, 32, 810, 810, false},
+    /* Carrizo */ {"Carrizo", "carrizo", "", 4, 16, 1, 256, 64 * Ki, 32, 801, 801, false},
+    /* Bristol */ {"Bristol Ridge", "carrizo", "", 4, 16, 1, 256, 64 * Ki, 32, 801, 801, false},
+    /* Stoney */ {"Stoney", "stoney", "", 4, 16, 1, 256, 64 * Ki, 32, 810, 810, false},
 
-  /* Iceland */ {"Iceland", "iceland", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 802, 800, false},
-  /* Tonga */ {"Tonga", "tonga", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 802, 800, false},
-  /* Fiji */ {"Fiji", "fiji", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
-  /* Ellesmere */ {"Ellesmere", "ellesmere", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
-  /* Baffin */ {"Baffin", "baffin", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
-  /* Lexa */ {"gfx804", "gfx804", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
+    /* Iceland */ {"Iceland", "iceland", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 802, 800, false},
+    /* Tonga */ {"Tonga", "tonga", "gfx802", 4, 16, 1, 256, 64 * Ki, 32, 802, 800, false},
+    /* Fiji */ {"Fiji", "fiji", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
+    /* Ellesmere */
+    {"Ellesmere", "ellesmere", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
+    /* Baffin */ {"Baffin", "baffin", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
+    /* Lexa */ {"gfx804", "gfx804", "gfx803", 4, 16, 1, 256, 64 * Ki, 32, 803, 804, false},
 };
 
 // Ordering as per AsicRevision# in //depot/stg/pal/inc/core/palDevice.h and
 // http://confluence.amd.com/pages/viewpage.action?spaceKey=ASLC&title=AMDGPU+Target+Names
 static const AMDDeviceInfo Gfx9PlusSubDeviceInfo[] = {
-    /* Vega10       */{"gfx900", "gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 900, 900, false},
-    /* Vega10 XNACK */{ LIGHTNING_SWITCH("gfx900","gfx901"), "gfx901", "gfx900",
-                        4, 16, 1, 256, 64 * Ki, 32,  900, 901, true},
-    /* Vega12       */{"gfx904", "gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 904, 904, false},
-    /* Vega12 XNACK */{ LIGHTNING_SWITCH("gfx904","gfx905"), "gfx905", "gfx904",
-                        4, 16, 1, 256, 64 * Ki, 32, 904, 905, true},
-    /* Vega20       */{"gfx906", "gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 906, 906, false},
-    /* Vega20 XNACK */{ LIGHTNING_SWITCH("gfx906","gfx907"), "gfx907", "gfx906",
-                        4, 16, 1, 256, 64 * Ki, 32, 906, 907, true},
-    /* Raven        */{"gfx902", "gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 902, false},
-    /* Raven XNACK  */{ LIGHTNING_SWITCH("gfx902","gfx903"), "gfx903", "gfx902",
-                        4, 16, 1, 256, 64 * Ki, 32, 902, 903, true},
-    /* Raven2       */{"gfx902", "gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 902, false},
-    /* Raven2 XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), "gfx903", "gfx902",
-                        4, 16, 1, 256, 64 * Ki, 32, 902, 903, true},
-    /* Renoir       */{"gfx902", "gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 902, false},
-    /* Renoir XNACK */{ LIGHTNING_SWITCH("gfx902","gfx903"), "gfx903", "gfx902",
-                        4, 16, 1, 256, 64 * Ki, 32, 902, 903, true},
-    /* Navi10_A0       */{ "gfx1010", "gfx1010", "gfx1010", 2, 32, 1, 256, 64 * Ki, 32, 1010, 1010, false },
-    /* Navi10_A0 XNACK */{ "gfx1010", "gfx1010", "gfx1010", 2, 32, 1, 256, 64 * Ki, 32, 1010, 1010, true },
-    /* Navi10       */{"gfx1010", "gfx1010", "gfx1010", 2, 32, 1, 256, 64 * Ki, 32, 1010, 1010, false},
-    /* Navi10 XNACK */{"gfx1010", "gfx1010", "gfx1010", 2, 32, 1, 256, 64 * Ki, 32, 1010, 1010, true},
-    /* Navi10Lite       */{"gfx1000", "gfx1000","gfx1000", 2, 32, 1, 256, 64 * Ki, 32, 1000, 1000, false},
-    /* Navi10Lite XNACK */{"gfx1000", "gfx1000", "gfx1000", 2, 32, 1, 256, 64 * Ki, 32, 1000, 1000, true},
-    /* Navi12       */{ "gfx1011", "gfx1011", "gfx1011", 2, 32, 1, 256, 64 * Ki, 32, 1011, 1011, false },
-    /* Navi12 XNACK */{ "gfx1011", "gfx1011", "gfx1011", 2, 32, 1, 256, 64 * Ki, 32, 1011, 1011, true },
-    /* Navi12Lite   */{ "gfx1011", "gfx1011", "gfx1011", 2, 32, 1, 256, 64 * Ki, 32, 1011, 1011, false },
-    /* Navi12Lite XNACK */{ "gfx1011", "gfx1011", "gfx1011", 2, 32, 1, 256, 64 * Ki, 32, 1011, 1011, true },
-    /* Navi14       */{ "gfx1012", "gfx1012", "gfx1012", 2, 32, 1, 256, 64 * Ki, 32, 1012, 1012, false },
-    /* Navi14 XNACK */{ "gfx1012", "gfx1012", "gfx1012", 2, 32, 1, 256, 64 * Ki, 32, 1012, 1012, true },
-    /* UnknownDevice3       */{ "gfx1030", "gfx1030", "gfx1030", 2, 32, 1, 256, 64 * Ki, 32, 1030, 1030, false },
-    /* UnknownDevice3 XNACK */{ "gfx1030", "gfx1030", "gfx1030", 2, 32, 1, 256, 64 * Ki, 32, 1030, 1030, true },
-    /* UnknownDevice2   */{ "gfx1030", "gfx1030", "gfx1030", 2, 32, 1, 256, 64 * Ki, 32, 1030, 1030, false },
-    /* UnknownDevice2 XNACK */{ "gfx1030", "gfx1030", "gfx1030", 2, 32, 1, 256, 64 * Ki, 32, 1030, 1030, true },
+    /* Vega10       */ {"gfx900", "gfx900", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 900, 900, false},
+    /* Vega10 XNACK */
+    {LIGHTNING_SWITCH("gfx900", "gfx901"), "gfx901", "gfx900", 4, 16, 1, 256, 64 * Ki, 32, 900, 901,
+     true},
+    /* Vega12       */ {"gfx904", "gfx904", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 904, 904, false},
+    /* Vega12 XNACK */
+    {LIGHTNING_SWITCH("gfx904", "gfx905"), "gfx905", "gfx904", 4, 16, 1, 256, 64 * Ki, 32, 904, 905,
+     true},
+    /* Vega20       */ {"gfx906", "gfx906", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 906, 906, false},
+    /* Vega20 XNACK */
+    {LIGHTNING_SWITCH("gfx906", "gfx907"), "gfx907", "gfx906", 4, 16, 1, 256, 64 * Ki, 32, 906, 907,
+     true},
+    /* Raven        */ {"gfx902", "gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 902, false},
+    /* Raven XNACK  */
+    {LIGHTNING_SWITCH("gfx902", "gfx903"), "gfx903", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 903,
+     true},
+    /* Raven2       */ {"gfx902", "gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 902, false},
+    /* Raven2 XNACK */
+    {LIGHTNING_SWITCH("gfx902", "gfx903"), "gfx903", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 903,
+     true},
+    /* Renoir       */ {"gfx902", "gfx902", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 902, false},
+    /* Renoir XNACK */
+    {LIGHTNING_SWITCH("gfx902", "gfx903"), "gfx903", "gfx902", 4, 16, 1, 256, 64 * Ki, 32, 902, 903,
+     true},
+    /* Navi10_A0       */
+    {"gfx1010", "gfx1010", "gfx1010", 2, 32, 1, 256, 64 * Ki, 32, 1010, 1010, false},
+    /* Navi10_A0 XNACK */
+    {"gfx1010", "gfx1010", "gfx1010", 2, 32, 1, 256, 64 * Ki, 32, 1010, 1010, true},
+    /* Navi10       */
+    {"gfx1010", "gfx1010", "gfx1010", 2, 32, 1, 256, 64 * Ki, 32, 1010, 1010, false},
+    /* Navi10 XNACK */
+    {"gfx1010", "gfx1010", "gfx1010", 2, 32, 1, 256, 64 * Ki, 32, 1010, 1010, true},
+    /* Navi10Lite       */
+    {"gfx1000", "gfx1000", "gfx1000", 2, 32, 1, 256, 64 * Ki, 32, 1000, 1000, false},
+    /* Navi10Lite XNACK */
+    {"gfx1000", "gfx1000", "gfx1000", 2, 32, 1, 256, 64 * Ki, 32, 1000, 1000, true},
+    /* Navi12       */
+    {"gfx1011", "gfx1011", "gfx1011", 2, 32, 1, 256, 64 * Ki, 32, 1011, 1011, false},
+    /* Navi12 XNACK */
+    {"gfx1011", "gfx1011", "gfx1011", 2, 32, 1, 256, 64 * Ki, 32, 1011, 1011, true},
+    /* Navi12Lite   */
+    {"gfx1011", "gfx1011", "gfx1011", 2, 32, 1, 256, 64 * Ki, 32, 1011, 1011, false},
+    /* Navi12Lite XNACK */
+    {"gfx1011", "gfx1011", "gfx1011", 2, 32, 1, 256, 64 * Ki, 32, 1011, 1011, true},
+    /* Navi14       */
+    {"gfx1012", "gfx1012", "gfx1012", 2, 32, 1, 256, 64 * Ki, 32, 1012, 1012, false},
+    /* Navi14 XNACK */
+    {"gfx1012", "gfx1012", "gfx1012", 2, 32, 1, 256, 64 * Ki, 32, 1012, 1012, true},
+    /* UnknownDevice3       */
+    {"gfx1030", "gfx1030", "gfx1030", 2, 32, 1, 256, 64 * Ki, 32, 1030, 1030, false},
+    /* UnknownDevice3 XNACK */
+    {"gfx1030", "gfx1030", "gfx1030", 2, 32, 1, 256, 64 * Ki, 32, 1030, 1030, true},
+    /* UnknownDevice2   */
+    {"gfx1030", "gfx1030", "gfx1030", 2, 32, 1, 256, 64 * Ki, 32, 1030, 1030, false},
+    /* UnknownDevice2 XNACK */
+    {"gfx1030", "gfx1030", "gfx1030", 2, 32, 1, 256, 64 * Ki, 32, 1030, 1030, true},
 
 };
 
diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
index c34f2ab003..4132b19f78 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
@@ -53,15 +53,14 @@ void PalDeviceUnload() { pal::Device::tearDown(); }
 
 namespace pal {
 
-Util::GenericAllocator  NullDevice::allocator_;
+Util::GenericAllocator NullDevice::allocator_;
 char* Device::platformObj_;
-Pal::IPlatform*  Device::platform_;
+Pal::IPlatform* Device::platform_;
 
 NullDevice::Compiler* NullDevice::compiler_;
 AppProfile Device::appProfile_;
 
-NullDevice::NullDevice()
-    : amd::Device(), ipLevel_(Pal::GfxIpLevel::None), hwInfo_(nullptr) {}
+NullDevice::NullDevice() : amd::Device(), ipLevel_(Pal::GfxIpLevel::None), hwInfo_(nullptr) {}
 
 bool NullDevice::init() {
   std::vector<Device*> devices;
@@ -89,8 +88,8 @@ bool NullDevice::init() {
       driverVersion = static_cast<amd::Device*>(devices[i])->info().driverVersion_;
       if (driverVersion.find("PAL") != std::string::npos) {
         if (static_cast<NullDevice*>(devices[i])->asicRevision() == revision) {
-            foundActive = true;
-            break;
+          foundActive = true;
+          break;
         }
       }
     }
@@ -109,132 +108,130 @@ bool NullDevice::init() {
       }
     }
   }
-#endif // defined(WITH_COMPILER_LIB)
+#endif  // defined(WITH_COMPILER_LIB)
 
   // Loop through all supported devices and create each of them
-  for (uint id = 0;
-        id < sizeof(Gfx9PlusSubDeviceInfo)/sizeof(AMDDeviceInfo); ++id) {
-      bool foundActive = false;
-      bool foundDuplicate = false;
-      uint gfxipVersion = IS_LIGHTNING ? pal::Gfx9PlusSubDeviceInfo[id].gfxipVersionLC_ :
-        pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_;
+  for (uint id = 0; id < sizeof(Gfx9PlusSubDeviceInfo) / sizeof(AMDDeviceInfo); ++id) {
+    bool foundActive = false;
+    bool foundDuplicate = false;
+    uint gfxipVersion = IS_LIGHTNING ? pal::Gfx9PlusSubDeviceInfo[id].gfxipVersionLC_
+                                     : pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_;
 
-      if (pal::Gfx9PlusSubDeviceInfo[id].targetName_[0] == '\0') {
-          continue;
-      }
+    if (pal::Gfx9PlusSubDeviceInfo[id].targetName_[0] == '\0') {
+      continue;
+    }
 
-      // Loop through all active PAL devices and see if we match one
-      for (uint i = 0; i < devices.size(); ++i) {
-        driverVersion = static_cast<amd::Device*>(devices[i])->info().driverVersion_;
-        if (driverVersion.find("PAL") != std::string::npos) {
-          gfxipVersion = devices[i]->settings().useLightning_ ?
-            pal::Gfx9PlusSubDeviceInfo[id].gfxipVersionLC_ :
-            pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_;
-          uint gfxIpCurrent = devices[i]->settings().useLightning_ ?
-            static_cast<NullDevice*>(devices[i])->hwInfo()->gfxipVersionLC_ :
-            static_cast<NullDevice*>(devices[i])->hwInfo()->gfxipVersion_;
-          if (gfxIpCurrent == gfxipVersion) {
-              foundActive = true;
-              break;
-          }
+    // Loop through all active PAL devices and see if we match one
+    for (uint i = 0; i < devices.size(); ++i) {
+      driverVersion = static_cast<amd::Device*>(devices[i])->info().driverVersion_;
+      if (driverVersion.find("PAL") != std::string::npos) {
+        gfxipVersion = devices[i]->settings().useLightning_
+            ? pal::Gfx9PlusSubDeviceInfo[id].gfxipVersionLC_
+            : pal::Gfx9PlusSubDeviceInfo[id].gfxipVersion_;
+        uint gfxIpCurrent = devices[i]->settings().useLightning_
+            ? static_cast<NullDevice*>(devices[i])->hwInfo()->gfxipVersionLC_
+            : static_cast<NullDevice*>(devices[i])->hwInfo()->gfxipVersion_;
+        if (gfxIpCurrent == gfxipVersion) {
+          foundActive = true;
+          break;
         }
       }
+    }
 
-      // Don't report an offline device if it's active
-      if (foundActive) {
-          continue;
+    // Don't report an offline device if it's active
+    if (foundActive) {
+      continue;
+    }
+
+    // Loop through all previous devices in the Gfx9PlusSubDeviceInfo list
+    // and compare them with the current entry to see if the current entry
+    // was listed previously in the Gfx9PlusSubDeviceInfo, if so, then it
+    // means the current entry already has been added in the offline device list
+    for (uint j = 0; j < id; ++j) {
+      if (pal::Gfx9PlusSubDeviceInfo[j].targetName_[0] == '\0') {
+        continue;
       }
-
-      // Loop through all previous devices in the Gfx9PlusSubDeviceInfo list
-      // and compare them with the current entry to see if the current entry
-      // was listed previously in the Gfx9PlusSubDeviceInfo, if so, then it
-      // means the current entry already has been added in the offline device list
-      for (uint j = 0; j < id; ++j) {
-          if (pal::Gfx9PlusSubDeviceInfo[j].targetName_[0] == '\0') {
-              continue;
-          }
-          if (strcmp(pal::Gfx9PlusSubDeviceInfo[j].targetName_,
-                     pal::Gfx9PlusSubDeviceInfo[id].targetName_) == 0) {
-              foundDuplicate = true;
-              break;
-          }
+      if (strcmp(pal::Gfx9PlusSubDeviceInfo[j].targetName_,
+                 pal::Gfx9PlusSubDeviceInfo[id].targetName_) == 0) {
+        foundDuplicate = true;
+        break;
       }
+    }
 
-      // Don't report an offline device twice
-      if (foundDuplicate) {
-          continue;
-      }
+    // Don't report an offline device twice
+    if (foundDuplicate) {
+      continue;
+    }
 
-      Pal::GfxIpLevel ipLevel = Pal::GfxIpLevel::_None;
-      uint ipLevelMajor = round(gfxipVersion / 100);
-      uint ipLevelMinor = round(gfxipVersion / 10 % 10);
-      switch (ipLevelMajor) {
+    Pal::GfxIpLevel ipLevel = Pal::GfxIpLevel::_None;
+    uint ipLevelMajor = round(gfxipVersion / 100);
+    uint ipLevelMinor = round(gfxipVersion / 10 % 10);
+    switch (ipLevelMajor) {
       case 9:
-          ipLevel = Pal::GfxIpLevel::GfxIp9;
-          break;
+        ipLevel = Pal::GfxIpLevel::GfxIp9;
+        break;
       case 10:
         switch (ipLevelMinor) {
-        case 0:
-          ipLevel = Pal::GfxIpLevel::GfxIp10;
-          break;
-        case 1:
-          ipLevel = Pal::GfxIpLevel::GfxIp10_1;
-          break;
-        case 2:
-          ipLevel = Pal::GfxIpLevel::GfxIp10_2;
-          break;
-        case 3:
-          ipLevel = Pal::GfxIpLevel::GfxIp10_3;
-          break;
+          case 0:
+            ipLevel = Pal::GfxIpLevel::GfxIp10;
+            break;
+          case 1:
+            ipLevel = Pal::GfxIpLevel::GfxIp10_1;
+            break;
+          case 2:
+            ipLevel = Pal::GfxIpLevel::GfxIp10_2;
+            break;
+          case 3:
+            ipLevel = Pal::GfxIpLevel::GfxIp10_3;
+            break;
         }
-      }
+    }
 
-      Pal::AsicRevision revision = Pal::AsicRevision::Unknown;
-      uint xNACKSupported = pal::Gfx9PlusSubDeviceInfo[id].xnackEnabled_ ? 1 : 0;
+    Pal::AsicRevision revision = Pal::AsicRevision::Unknown;
+    uint xNACKSupported = pal::Gfx9PlusSubDeviceInfo[id].xnackEnabled_ ? 1 : 0;
 
-      switch (gfxipVersion) {
+    switch (gfxipVersion) {
       case 901:
       case 900:
-          revision = Pal::AsicRevision::Vega10;
-          break;
+        revision = Pal::AsicRevision::Vega10;
+        break;
       case 903:
       case 902:
-          revision = Pal::AsicRevision::Raven;
-          break;
+        revision = Pal::AsicRevision::Raven;
+        break;
       case 905:
       case 904:
-          revision = Pal::AsicRevision::Vega12;
-          break;
+        revision = Pal::AsicRevision::Vega12;
+        break;
       case 907:
       case 906:
-          revision = Pal::AsicRevision::Vega20;
-          break;
+        revision = Pal::AsicRevision::Vega20;
+        break;
       case 1000:
-          revision = Pal::AsicRevision::Navi10Lite;
-          break;
+        revision = Pal::AsicRevision::Navi10Lite;
+        break;
       case 1010:
-          revision = Pal::AsicRevision::Navi10;
-          break;
+        revision = Pal::AsicRevision::Navi10;
+        break;
       case 1011:
-          revision = Pal::AsicRevision::Navi12;
-          break;
+        revision = Pal::AsicRevision::Navi12;
+        break;
       case 1012:
-          revision = Pal::AsicRevision::Navi14;
-          break;
+        revision = Pal::AsicRevision::Navi14;
+        break;
       case 1030:
-          ShouldNotReachHere();
-          break;
-      }
+        ShouldNotReachHere();
+        break;
+    }
 
-      NullDevice* dev = new NullDevice();
-      if (nullptr != dev) {
-          if (!dev->create(revision, ipLevel, xNACKSupported)) {
-              delete dev;
-          }
-          else {
-              dev->registerDevice();
-          }
+    NullDevice* dev = new NullDevice();
+    if (nullptr != dev) {
+      if (!dev->create(revision, ipLevel, xNACKSupported)) {
+        delete dev;
+      } else {
+        dev->registerDevice();
       }
+    }
   }
 
   return true;
@@ -257,10 +254,10 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
   if ((GPU_ENABLE_PAL == 1) && (ipLevel == Pal::GfxIpLevel::_None)) {
     hwInfo_ = &DeviceInfo[static_cast<uint>(asicRevision)];
   } else if (ipLevel >= Pal::GfxIpLevel::GfxIp9) {
-      subtarget = (static_cast<uint>(asicRevision_) %
-                   static_cast<uint>(Pal::AsicRevision::Vega10))
-                   << 1 | xNACKSupported;
-      hwInfo_ = &Gfx9PlusSubDeviceInfo[subtarget];
+    subtarget = (static_cast<uint>(asicRevision_) % static_cast<uint>(Pal::AsicRevision::Vega10))
+            << 1 |
+        xNACKSupported;
+    hwInfo_ = &Gfx9PlusSubDeviceInfo[subtarget];
 
   } else {
     return false;
@@ -271,8 +268,7 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
 
   // Report 512MB for all offline devices
   Pal::GpuMemoryHeapProperties heaps[Pal::GpuHeapCount];
-  heaps[Pal::GpuHeapLocal].heapSize =
-  heaps[Pal::GpuHeapLocal].physicalHeapSize = 512 * Mi;
+  heaps[Pal::GpuHeapLocal].heapSize = heaps[Pal::GpuHeapLocal].physicalHeapSize = 512 * Mi;
 
   Pal::WorkStationCaps wscaps = {};
 
@@ -295,7 +291,7 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
   info_.wavefrontWidth_ = settings().enableWave32Mode_ ? 32 : 64;
 
   if (settings().useLightning_) {
-#if defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY)
+#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
     //  create compilation object with cache support
     int gfxipMajor = hwInfo_->gfxipVersionLC_ / 100;
     int gfxipMinor = hwInfo_->gfxipVersionLC_ / 10 % 10;
@@ -323,16 +319,16 @@ bool NullDevice::create(Pal::AsicRevision asicRevision, Pal::GfxIpLevel ipLevel,
     cacheCompilation_.reset(compObj);
 #endif
   } else {
-#if  defined(WITH_COMPILER_LIB)
+#if defined(WITH_COMPILER_LIB)
     const char* library = getenv("HSA_COMPILER_LIBRARY");
-    aclCompilerOptions opts = { sizeof(aclCompilerOptions_0_8),
-      library,
-      nullptr,
-      nullptr,
-      nullptr,
-      nullptr,
-      nullptr,
-      AMD_OCL_SC_LIB };
+    aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8),
+                               library,
+                               nullptr,
+                               nullptr,
+                               nullptr,
+                               nullptr,
+                               nullptr,
+                               AMD_OCL_SC_LIB};
     // Initialize the compiler handle
     acl_error error;
     compiler_ = aclCompilerInit(&opts, &error);
@@ -370,9 +366,9 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
 
   info_.maxWorkItemDimensions_ = 3;
 
-  info_.maxComputeUnits_ = settings().enableWgpMode_ ?
-    palProp.gfxipProperties.shaderCore.numAvailableCus / 2 :
-    palProp.gfxipProperties.shaderCore.numAvailableCus;
+  info_.maxComputeUnits_ = settings().enableWgpMode_
+      ? palProp.gfxipProperties.shaderCore.numAvailableCus / 2
+      : palProp.gfxipProperties.shaderCore.numAvailableCus;
 
   info_.numberOfShaderEngines = palProp.gfxipProperties.shaderCore.numShaderEngines;
 
@@ -427,7 +423,8 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
   if (GPU_ADD_HBCC_SIZE) {
     localRAM = heaps[Pal::GpuHeapLocal].heapSize + heaps[Pal::GpuHeapInvisible].heapSize;
   } else {
-    localRAM = heaps[Pal::GpuHeapLocal].physicalHeapSize + heaps[Pal::GpuHeapInvisible].physicalHeapSize;
+    localRAM =
+        heaps[Pal::GpuHeapLocal].physicalHeapSize + heaps[Pal::GpuHeapInvisible].physicalHeapSize;
   }
 
   info_.globalMemSize_ = (static_cast<cl_ulong>(std::min(GPU_MAX_HEAP_SIZE, 100u)) *
@@ -445,10 +442,10 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
   // Find the largest heap form FB memory
   if (GPU_ADD_HBCC_SIZE) {
     info_.maxMemAllocSize_ = std::max(cl_ulong(heaps[Pal::GpuHeapLocal].heapSize),
-      cl_ulong(heaps[Pal::GpuHeapInvisible].heapSize));
+                                      cl_ulong(heaps[Pal::GpuHeapInvisible].heapSize));
   } else {
     info_.maxMemAllocSize_ = std::max(cl_ulong(heaps[Pal::GpuHeapLocal].physicalHeapSize),
-      cl_ulong(heaps[Pal::GpuHeapInvisible].physicalHeapSize));
+                                      cl_ulong(heaps[Pal::GpuHeapInvisible].physicalHeapSize));
   }
 
 #if defined(ATI_OS_WIN)
@@ -561,7 +558,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
 
   ::strcpy(info_.vendor_, "Advanced Micro Devices, Inc.");
   ::snprintf(info_.driverVersion_, sizeof(info_.driverVersion_) - 1, AMD_BUILD_STRING " (PAL%s)",
-        settings().useLightning_ ? ",LC" : ",HSAIL");
+             settings().useLightning_ ? ",LC" : ",HSAIL");
 
   info_.profile_ = "FULL_PROFILE";
   if (settings().oclVersion_ >= OpenCL20) {
@@ -640,15 +637,16 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
     info_.cuPerShaderArray_ = palProp.gfxipProperties.shaderCore.numCusPerShaderArray;
     info_.simdWidth_ = hwInfo()->simdWidth_;
     info_.simdInstructionWidth_ = hwInfo()->simdInstructionWidth_;
-    info_.wavefrontWidth_ = settings().enableWave32Mode_ ? 32:
-                            palProp.gfxipProperties.shaderCore.nativeWavefrontSize;
+    info_.wavefrontWidth_ =
+        settings().enableWave32Mode_ ? 32 : palProp.gfxipProperties.shaderCore.nativeWavefrontSize;
     info_.availableSGPRs_ = palProp.gfxipProperties.shaderCore.numAvailableSgprs;
 
     info_.globalMemChannelBanks_ = 4;
     info_.globalMemChannelBankWidth_ = hwInfo()->memChannelBankWidth_;
     info_.localMemSizePerCU_ = hwInfo()->localMemSizePerCU_;
     info_.localMemBanks_ = hwInfo()->localMemBanks_;
-    info_.gfxipVersion_ = settings().useLightning_ ? hwInfo()->gfxipVersionLC_ : hwInfo()->gfxipVersion_;
+    info_.gfxipVersion_ =
+        settings().useLightning_ ? hwInfo()->gfxipVersionLC_ : hwInfo()->gfxipVersion_;
 
     info_.timeStampFrequency_ = 1000000;
     info_.numAsyncQueues_ = numComputeRings;
@@ -661,7 +659,7 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
     info_.pcieDeviceId_ = palProp.deviceId;
     info_.pcieRevisionId_ = palProp.revisionId;
     info_.maxThreadsPerCU_ = info_.wavefrontWidth_ * hwInfo()->simdPerCU_ *
-                             palProp.gfxipProperties.shaderCore.numWavefrontsPerSimd;
+        palProp.gfxipProperties.shaderCore.numWavefrontsPerSimd;
   }
 }
 
@@ -789,8 +787,7 @@ Device::Device()
       globalScratchBuf_(nullptr),
       srdManager_(nullptr),
       resourceList_(nullptr),
-      rgpCaptureMgr_(nullptr)
-      {}
+      rgpCaptureMgr_(nullptr) {}
 
 Device::~Device() {
   // remove the HW debug manager
@@ -803,8 +800,8 @@ Device::~Device() {
   }
 
   if (glb_ctx_ != nullptr) {
-      glb_ctx_->release();
-      glb_ctx_ = nullptr;
+    glb_ctx_->release();
+    glb_ctx_ = nullptr;
   }
 
   delete srdManager_;
@@ -878,19 +875,21 @@ bool Device::create(Pal::IDevice* device) {
   ipLevel_ = properties().gfxLevel;
   asicRevision_ = properties().revision;
 
-   // XNACK flag should be set for  PageMigration | IOMMUv2 Support
-  uint isXNACKSupported = static_cast<uint>(properties_.gpuMemoryProperties.flags.pageMigrationEnabled
-      || properties_.gpuMemoryProperties.flags.iommuv2Support);
+  // XNACK flag should be set for  PageMigration | IOMMUv2 Support
+  uint isXNACKSupported =
+      static_cast<uint>(properties_.gpuMemoryProperties.flags.pageMigrationEnabled ||
+                        properties_.gpuMemoryProperties.flags.iommuv2Support);
   uint subtarget = isXNACKSupported;
 
   // Update HW info for the device
   if ((GPU_ENABLE_PAL == 1) && (properties().revision <= Pal::AsicRevision::Polaris12)) {
     hwInfo_ = &DeviceInfo[static_cast<uint>(properties().revision)];
   } else if (ipLevel_ >= Pal::GfxIpLevel::GfxIp9) {
-      // For compiler sub targets
-      subtarget = (static_cast<uint>(asicRevision_) % static_cast<uint>(Pal::AsicRevision::Vega10)) << 1 |
-          subtarget;
-      hwInfo_ = &Gfx9PlusSubDeviceInfo[subtarget];
+    // For compiler sub targets
+    subtarget = (static_cast<uint>(asicRevision_) % static_cast<uint>(Pal::AsicRevision::Vega10))
+            << 1 |
+        subtarget;
+    hwInfo_ = &Gfx9PlusSubDeviceInfo[subtarget];
   } else {
     return false;
   }
@@ -995,7 +994,7 @@ bool Device::create(Pal::IDevice* device) {
   }
 
   if (settings().useLightning_) {
-#if defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY)
+#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
     //  create compilation object with cache support
     int gfxipMajor = hwInfo()->gfxipVersionLC_ / 100;
     int gfxipMinor = hwInfo()->gfxipVersionLC_ / 10 % 10;
@@ -1013,7 +1012,7 @@ bool Device::create(Pal::IDevice* device) {
     }
 
     amd::CacheCompilation* compObj = new amd::CacheCompilation(
-      cacheTarget.str(), "_pal", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET);
+        cacheTarget.str(), "_pal", OCL_CODE_CACHE_ENABLE, OCL_CODE_CACHE_RESET);
     if (!compObj) {
       LogError("Unable to create cache compilation object!");
       return false;
@@ -1021,18 +1020,17 @@ bool Device::create(Pal::IDevice* device) {
 
     cacheCompilation_.reset(compObj);
 #endif
-  }
-  else {
-#if  defined(WITH_COMPILER_LIB)
+  } else {
+#if defined(WITH_COMPILER_LIB)
     const char* library = getenv("HSA_COMPILER_LIBRARY");
-    aclCompilerOptions opts = { sizeof(aclCompilerOptions_0_8),
-      library,
-      nullptr,
-      nullptr,
-      nullptr,
-      nullptr,
-      nullptr,
-      AMD_OCL_SC_LIB };
+    aclCompilerOptions opts = {sizeof(aclCompilerOptions_0_8),
+                               library,
+                               nullptr,
+                               nullptr,
+                               nullptr,
+                               nullptr,
+                               nullptr,
+                               AMD_OCL_SC_LIB};
     // Initialize the compiler handle
     acl_error error;
     compiler_ = aclCompilerInit(&opts, &error);
@@ -1056,7 +1054,7 @@ bool Device::create(Pal::IDevice* device) {
 
   if ((glb_ctx_ == nullptr) && (gNumDevices > 1) && (device == gDeviceList[gNumDevices - 1])) {
     std::vector<amd::Device*> devices;
-    uint32_t numDevices =  amd::Device::numDevices(CL_DEVICE_TYPE_GPU, true);
+    uint32_t numDevices = amd::Device::numDevices(CL_DEVICE_TYPE_GPU, true);
     // Add all PAL devices
     for (uint32_t i = gStartDevice; i < numDevices; ++i) {
       devices.push_back(amd::Device::devices()[i]);
@@ -1070,8 +1068,8 @@ bool Device::create(Pal::IDevice* device) {
       if (glb_ctx_ == nullptr) {
         return false;
       }
-      amd::Buffer* buf = 
-        new (GlbCtx()) amd::Buffer(GlbCtx(), CL_MEM_ALLOC_HOST_PTR, kP2PStagingSize);
+      amd::Buffer* buf =
+          new (GlbCtx()) amd::Buffer(GlbCtx(), CL_MEM_ALLOC_HOST_PTR, kP2PStagingSize);
       if ((buf != nullptr) && buf->create()) {
         p2p_stage_ = buf;
       } else {
@@ -1086,11 +1084,8 @@ bool Device::create(Pal::IDevice* device) {
 
 // =====================================================================================================================
 // Master function that handles developer callbacks from PAL.
-void PAL_STDCALL Device::PalDeveloperCallback(
-  void*                        pPrivateData,
-  const Pal::uint32            deviceIndex,
-  Pal::Developer::CallbackType type,
-  void*                        pCbData) {
+void PAL_STDCALL Device::PalDeveloperCallback(void* pPrivateData, const Pal::uint32 deviceIndex,
+                                              Pal::Developer::CallbackType type, void* pCbData) {
   Device* device = static_cast<Device*>(pPrivateData);
   const auto& barrier = *static_cast<const Pal::Developer::BarrierData*>(pCbData);
 
@@ -1099,7 +1094,7 @@ void PAL_STDCALL Device::PalDeveloperCallback(
   VirtualGPU* gpu = nullptr;
   if (pBarrierData->pCmdBuffer != nullptr) {
     // Find which queue the current command buffer belongs
-    for (const auto& it: device->vgpus()) {
+    for (const auto& it : device->vgpus()) {
       if (it->isActiveCmd(pBarrierData->pCmdBuffer)) {
         gpu = it;
         break;
@@ -1112,18 +1107,18 @@ void PAL_STDCALL Device::PalDeveloperCallback(
   }
 
   switch (type) {
-  case Pal::Developer::CallbackType::BarrierBegin:
-    device->rgpCaptureMgr()->WriteBarrierStartMarker(gpu, barrier);
-  break;
-  case Pal::Developer::CallbackType::BarrierEnd:
-    device->rgpCaptureMgr()->WriteBarrierEndMarker(gpu, barrier);
-  break;
-  case Pal::Developer::CallbackType::ImageBarrier:
-    assert(false);
-    break;
-  case Pal::Developer::CallbackType::DrawDispatch:
+    case Pal::Developer::CallbackType::BarrierBegin:
+      device->rgpCaptureMgr()->WriteBarrierStartMarker(gpu, barrier);
       break;
-  default:
+    case Pal::Developer::CallbackType::BarrierEnd:
+      device->rgpCaptureMgr()->WriteBarrierEndMarker(gpu, barrier);
+      break;
+    case Pal::Developer::CallbackType::ImageBarrier:
+      assert(false);
+      break;
+    case Pal::Developer::CallbackType::DrawDispatch:
+      break;
+    default:
       break;
   }
 }
@@ -1136,15 +1131,16 @@ bool Device::initializeHeapResources() {
     // Request all compute engines
     finalizeInfo.requestedEngineCounts[Pal::EngineTypeCompute].engines =
         ((1 << numComputeEngines_) - 1);
-    for (const auto& it: exclusiveComputeEnginesId_) {
+    for (const auto& it : exclusiveComputeEnginesId_) {
       // Request real time compute engines
-      finalizeInfo.requestedEngineCounts[Pal::EngineTypeExclusiveCompute].engines |= (1 << it.second);
+      finalizeInfo.requestedEngineCounts[Pal::EngineTypeExclusiveCompute].engines |=
+          (1 << it.second);
     }
     // Request all SDMA engines
     finalizeInfo.requestedEngineCounts[Pal::EngineTypeDma].engines = (1 << numDmaEngines_) - 1;
 
     if (iDev()->Finalize(finalizeInfo) != Pal::Result::Success) {
-        return false;
+      return false;
     }
 
     heapInitComplete_ = true;
@@ -1201,7 +1197,8 @@ device::VirtualDevice* Device::createVirtualDevice(amd::CommandQueue* queue) {
   if (queue != nullptr) {
     profiling = queue->properties().test(CL_QUEUE_PROFILING_ENABLE);
     if (queue->asHostQueue() != nullptr) {
-      bool interopQueue = (0 != (queue->context().info().flags_ &
+      bool interopQueue = (0 !=
+                           (queue->context().info().flags_ &
                             (amd::Context::GLDeviceKhr | amd::Context::D3D10DeviceKhr |
                              amd::Context::D3D11DeviceKhr)));
       rtCUs = queue->rtCUs();
@@ -1233,8 +1230,7 @@ device::Program* Device::createProgram(amd::option::Options* options) {
   device::Program* program;
   if (settings().useLightning_) {
     program = new LightningProgram(*this);
-  }
-  else {
+  } else {
     program = new HSAILProgram(*this);
   }
   if (program == nullptr) {
@@ -1249,9 +1245,7 @@ typedef std::unordered_map<int, bool> requestedDevices_t;
 
 //! Parses the requested list of devices to be exposed to the user.
 static void parseRequestedDeviceList(const char* requestedDeviceList,
-                                     requestedDevices_t& requestedDevices,
-                                     uint32_t numDevices) {
-
+                                     requestedDevices_t& requestedDevices, uint32_t numDevices) {
   char* pch = strtok(const_cast<char*>(requestedDeviceList), ",");
   while (pch != nullptr) {
     bool deviceIdValid = true;
@@ -1263,8 +1257,7 @@ static void parseRequestedDeviceList(const char* requestedDeviceList,
         break;
       }
     }
-    if (currentDeviceIndex < 0 ||
-      static_cast<uint32_t>(currentDeviceIndex) >= numDevices) {
+    if (currentDeviceIndex < 0 || static_cast<uint32_t>(currentDeviceIndex) >= numDevices) {
       deviceIdValid = false;
     }
     // Get next token.
@@ -1310,9 +1303,9 @@ bool Device::init() {
   // Count up all the devices in the system.
   platform_->EnumerateDevices(&gNumDevices, &gDeviceList[0]);
 
-  const char* requestedDeviceList = amd::IS_HIP ? ((HIP_VISIBLE_DEVICES[0] != '\0') ?
-                                    HIP_VISIBLE_DEVICES : CUDA_VISIBLE_DEVICES)
-                                    : GPU_DEVICE_ORDINAL;
+  const char* requestedDeviceList = amd::IS_HIP
+      ? ((HIP_VISIBLE_DEVICES[0] != '\0') ? HIP_VISIBLE_DEVICES : CUDA_VISIBLE_DEVICES)
+      : GPU_DEVICE_ORDINAL;
 
   if (requestedDeviceList[0] != '\0') {
     useDeviceList = true;
@@ -1465,8 +1458,8 @@ pal::Memory* Device::createBuffer(amd::Memory& owner, bool directAccess) const {
     if (result) {
       // Disallow permanent map for Win7 only, since OS will move buffer to sysmem
       if (IS_LINUX ||
-        // Or Win10
-        (properties().gpuMemoryProperties.flags.supportPerSubmitMemRefs == false)) {
+          // Or Win10
+          (properties().gpuMemoryProperties.flags.supportPerSubmitMemRefs == false)) {
         void* address = gpuMemory->map(nullptr);
         CondLog(address == nullptr, "PAL failed lock of persistent memory!");
       }
@@ -1697,9 +1690,9 @@ device::Memory* Device::createMemory(amd::Memory& owner) const {
       (memory->memoryType() != Resource::ExternalPhysical) &&
       ((owner.getHostMem() != nullptr) ||
        ((nullptr != owner.parent()) && (owner.getHostMem() != nullptr)))) {
-    bool ok = memory->pinSystemMemory(owner.getHostMem(), (owner.getHostMemRef()->size())
-                                          ? owner.getHostMemRef()->size()
-                                          : owner.getSize());
+    bool ok = memory->pinSystemMemory(
+        owner.getHostMem(),
+        (owner.getHostMemRef()->size()) ? owner.getHostMemRef()->size() : owner.getSize());
     //! \note: Ignore the pinning result for now
   }
 
@@ -1720,9 +1713,9 @@ bool Device::createSampler(const amd::Sampler& owner, device::Sampler** sampler)
 device::Memory* Device::createView(amd::Memory& owner, const device::Memory& parent) const {
   assert((owner.asImage() != nullptr) && "View supports images only");
   const amd::Image& image = *owner.asImage();
-  pal::Memory* gpuImage = new pal::Image(
-    *this, owner, image.getWidth(), image.getHeight(), image.getDepth(),
-    image.getImageFormat(), image.getType(), image.getMipLevels());
+  pal::Memory* gpuImage =
+      new pal::Image(*this, owner, image.getWidth(), image.getHeight(), image.getDepth(),
+                     image.getImageFormat(), image.getType(), image.getMipLevels());
 
   // Create resource
   if (nullptr != gpuImage) {
@@ -1827,19 +1820,18 @@ bool Device::globalFreeMemory(size_t* freeMemory) const {
   Pal::gpusize invisible = allocedMem[Pal::GpuHeapInvisible] - resourceCache().lclCacheSize();
 
   // Fill free memory info
-  freeMemory[TotalFreeMemory] = static_cast<size_t>((info().globalMemSize_ -
-    (local + invisible)) / Ki);
+  freeMemory[TotalFreeMemory] =
+      static_cast<size_t>((info().globalMemSize_ - (local + invisible)) / Ki);
   if (invisible >= heaps_[Pal::GpuHeapInvisible].heapSize) {
     invisible = 0;
-  }
-  else {
+  } else {
     invisible = heaps_[Pal::GpuHeapInvisible].heapSize - invisible;
   }
   freeMemory[LargestFreeBlock] = static_cast<size_t>(invisible) / Ki;
 
   if (settings().apuSystem_) {
     Pal::gpusize sysMem = allocedMem[Pal::GpuHeapGartCacheable] + allocedMem[Pal::GpuHeapGartUswc] -
-      resourceCache().cacheSize() + resourceCache().lclCacheSize();
+        resourceCache().cacheSize() + resourceCache().lclCacheSize();
     sysMem /= Ki;
     if (sysMem >= freeMemory[TotalFreeMemory]) {
       freeMemory[TotalFreeMemory] = 0;
@@ -1945,8 +1937,7 @@ bool Device::allocScratch(uint regNum, const VirtualGPU* vgpu) {
     amd::ScopedLock lk(scratchAlloc_);
     uint sb = vgpu->hwRing();
     static const uint WaveSizeLimit = ((1 << 21) - 256);
-    const uint threadSizeLimit =
-        WaveSizeLimit / info().wavefrontWidth_;
+    const uint threadSizeLimit = WaveSizeLimit / info().wavefrontWidth_;
     if (regNum > threadSizeLimit) {
       LogError("Requested private memory is bigger than HW supports!");
       regNum = threadSizeLimit;
@@ -1968,9 +1959,8 @@ bool Device::allocScratch(uint regNum, const VirtualGPU* vgpu) {
           // Calculate the size of the scratch buffer for a queue
           uint32_t numTotalCUs = info().maxComputeUnits_;
           uint32_t numMaxWaves = settings().numScratchWavesPerCu_ * numTotalCUs;
-          scratchBuf->size_ =
-              static_cast<uint64_t>(info().wavefrontWidth_) *
-              scratchBuf->regNum_ * numMaxWaves * sizeof(uint32_t);
+          scratchBuf->size_ = static_cast<uint64_t>(info().wavefrontWidth_) * scratchBuf->regNum_ *
+              numMaxWaves * sizeof(uint32_t);
           scratchBuf->size_ = std::min(scratchBuf->size_, info().maxMemAllocSize_);
           scratchBuf->size_ = std::min(scratchBuf->size_, uint64_t(3 * Gi));
           // Note: Generic address space setup in HW requires 64KB alignment for scratch
@@ -2280,7 +2270,7 @@ void Device::SrdManager::freeSrdSlot(uint64_t addr) {
 void Device::updateAllocedMemory(Pal::GpuHeap heap, Pal::gpusize size, bool free) const {
   if (free) {
     allocedMem[heap] -= size;
-  }  else {
+  } else {
     allocedMem[heap] += size;
   }
 }
@@ -2337,12 +2327,18 @@ cl_int Device::hwDebugManagerInit(amd::Context* context, uintptr_t messageStorag
   return status;
 }
 
-bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) {
+bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput,
+                          cl_set_device_clock_mode_output_amd* pSetClockModeOutput) {
   bool result = false;
   Pal::SetClockModeInput setClockMode = {};
-  Pal::DeviceClockMode palClockMode = static_cast<Pal::DeviceClockMode>(setClockModeInput.clock_mode);
+  Pal::DeviceClockMode palClockMode =
+      static_cast<Pal::DeviceClockMode>(setClockModeInput.clock_mode);
   setClockMode.clockMode = palClockMode;
-  result = (Pal::Result::Success == (iDev()->SetClockMode(setClockMode, reinterpret_cast<Pal::SetClockModeOutput*>(pSetClockModeOutput))))? true : false;
+  result = (Pal::Result::Success ==
+            (iDev()->SetClockMode(setClockMode,
+                                  reinterpret_cast<Pal::SetClockModeOutput*>(pSetClockModeOutput))))
+      ? true
+      : false;
   return result;
 }
 
diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.hpp b/projects/clr/rocclr/runtime/device/pal/paldevice.hpp
index 4528954dc2..5420c8202a 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldevice.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldevice.hpp
@@ -49,7 +49,7 @@ class NullDevice : public amd::Device {
   bool create(Pal::AsicRevision asicRevision,  //!< GPU ASIC revision
               Pal::GfxIpLevel ipLevel,         //!< GPU ip level
               uint xNACKSupported = 0          //!< GPU xNACKSupported
-             );
+  );
 
   //! Instantiate a new virtual device
   virtual device::VirtualDevice* createVirtualDevice(amd::CommandQueue* queue = NULL) {
@@ -111,11 +111,14 @@ class NullDevice : public amd::Device {
   virtual void svmFree(void* ptr) const { return; }
 
   void* Alloc(const Util::AllocInfo& allocInfo) { return allocator_.Alloc(allocInfo); }
-  void  Free(const Util::FreeInfo& freeInfo) { allocator_.Free(freeInfo); }
-  virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; }
+  void Free(const Util::FreeInfo& freeInfo) { allocator_.Free(freeInfo); }
+  virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput,
+                            cl_set_device_clock_mode_output_amd* pSetClockModeOutput) {
+    return true;
+  }
 
  protected:
-  static Util::GenericAllocator allocator_; //!< Generic memory allocator in PAL
+  static Util::GenericAllocator allocator_;  //!< Generic memory allocator in PAL
 
   Pal::AsicRevision asicRevision_;  //!< ASIC revision
   Pal::GfxIpLevel ipLevel_;         //!< Device IP level
@@ -127,7 +130,7 @@ class NullDevice : public amd::Device {
                       size_t maxTextureSize,         //!< Maximum texture size supported in HW
                       uint numComputeRings,          //!< Number of compute rings
                       uint numExclusiveComputeRings  //!< Number of exclusive compute rings
-                      );
+  );
 };
 
 //! Forward declarations
@@ -148,26 +151,22 @@ class ThreadTrace;
 #ifndef CL_FILTER_NONE
 #define CL_FILTER_NONE 0x1142
 #endif
-enum class ExclusiveQueueType : uint32_t {
-  RealTime0 = 0,
-  RealTime1,
-  Medium
-};
+enum class ExclusiveQueueType : uint32_t { RealTime0 = 0, RealTime1, Medium };
 class Sampler : public device::Sampler {
  public:
   //! Constructor
-    Sampler(const Device& dev) : dev_(dev) {}
+  Sampler(const Device& dev) : dev_(dev) {}
 
   //! Default destructor for the device memory object
   virtual ~Sampler();
 
   //! Creates a device sampler from the OCL sampler state
   bool create(uint32_t oclSamplerState  //!< OCL sampler state
-              );
+  );
 
   //! Creates a device sampler from the OCL sampler state
   bool create(const amd::Sampler& owner  //!< AMD sampler object
-              );
+  );
 
  private:
   //! Disable default copy constructor
@@ -216,7 +215,7 @@ class Device : public NullDevice {
     //! Releases transfer buffer
     void release(VirtualGPU& gpu,  //!< Virual GPU object used with the buffer
                  Memory& buffer    //!< Transfer buffer for release
-                 );
+    );
 
     //! Returns the buffer's size for transfer
     size_t bufSize() const { return bufSize_; }
@@ -308,7 +307,7 @@ class Device : public NullDevice {
   //! Initialise a device (i.e. all parts of the constructor that could
   //! potentially fail)
   bool create(Pal::IDevice* device  //!< PAL device interface object
-              );
+  );
 
   //! Destructor for the physical GPU device
   virtual ~Device();
@@ -346,7 +345,8 @@ class Device : public NullDevice {
   virtual bool validateKernel(const amd::Kernel& kernel,  //!< AMD kernel object
                               const device::VirtualDevice* vdev);
 
-  virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput);
+  virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput,
+                            cl_set_device_clock_mode_output_amd* pSetClockModeOutput);
 
   //! Retrieves information about free memory on a GPU device
   virtual bool globalFreeMemory(size_t* freeMemory) const;
@@ -398,9 +398,10 @@ class Device : public NullDevice {
   //! Returns the number of available compute rings
   uint numExclusiveComputeEngines() const { return exclusiveComputeEnginesId_.size(); }
 
-  //! Returns the map of available exclusive compute rings with the engine index 
-  const std::map<ExclusiveQueueType, uint32_t>& exclusiveComputeEnginesId() const
-    { return exclusiveComputeEnginesId_; }
+  //! Returns the map of available exclusive compute rings with the engine index
+  const std::map<ExclusiveQueueType, uint32_t>& exclusiveComputeEnginesId() const {
+    return exclusiveComputeEnginesId_;
+  }
 
   //! Returns the number of available DMA engines
   uint numDMAEngines() const { return numDmaEngines_; }
@@ -526,11 +527,8 @@ class Device : public NullDevice {
   }
 
  private:
-  static void PAL_STDCALL PalDeveloperCallback(
-    void*                        pPrivateData,
-    const Pal::uint32            deviceIndex,
-    Pal::Developer::CallbackType type,
-    void*                        pCbData);
+  static void PAL_STDCALL PalDeveloperCallback(void* pPrivateData, const Pal::uint32 deviceIndex,
+                                               Pal::Developer::CallbackType type, void* pCbData);
 
   //! Disable copy constructor
   Device(const Device&);
@@ -554,36 +552,37 @@ class Device : public NullDevice {
   //! Allocates/reallocates the scratch buffer, according to the usage
   bool allocScratch(uint regNum,            //!< Number of the scratch registers
                     const VirtualGPU* vgpu  //!< Virtual GPU for the allocation
-                    );
+  );
 
   //! Interop for D3D devices
   bool associateD3D11Device(void* d3d11Device  //!< void* is of type ID3D11Device*
-                            );
+  );
   bool associateD3D10Device(void* d3d10Device  //!< void* is of type ID3D10Device*
-                            );
+  );
   bool associateD3D9Device(void* d3d9Device  //!< void* is of type IDirect3DDevice9*
-                           );
+  );
   //! Interop for GL device
   bool glAssociate(void* GLplatformContext, void* GLdeviceContext) const;
   bool glDissociate(void* GLplatformContext, void* GLdeviceContext) const;
 
-  static char* platformObj_;          //!< Memory allocated for PAL platform object
-  static Pal::IPlatform*  platform_;  //!< Pointer to the PAL platform object
+  static char* platformObj_;         //!< Memory allocated for PAL platform object
+  static Pal::IPlatform* platform_;  //!< Pointer to the PAL platform object
 
-  amd::Context* context_;       //!< A dummy context for internal allocations
-  mutable amd::Monitor lockAsyncOps_;    //!< Lock to serialise all async ops on this device
+  amd::Context* context_;              //!< A dummy context for internal allocations
+  mutable amd::Monitor lockAsyncOps_;  //!< Lock to serialise all async ops on this device
   //! Lock to serialise all async ops on initialization heap operation
-  mutable amd::Monitor lockForInitHeap_;        
-  mutable amd::Monitor lockPAL_;         //!< Lock to serialise PAL access
-  mutable amd::Monitor vgpusAccess_;     //!< Lock to serialise virtual gpu list access
-  mutable amd::Monitor scratchAlloc_;    //!< Lock to serialise scratch allocation
-  mutable amd::Monitor mapCacheOps_;     //!< Lock to serialise cache for the map resources
-  mutable amd::Monitor lockResourceOps_; //!< Lock to serialise resource access
-  XferBuffers* xferRead_;                //!< Transfer buffers read
-  std::vector<amd::Memory*>* mapCache_;  //!< Map cache info structure
-  ResourceCache* resourceCache_;         //!< Resource cache
-  uint numComputeEngines_;               //!< The number of available compute engines
-  std::map<ExclusiveQueueType, uint32_t> exclusiveComputeEnginesId_;//!< The number of available compute engines
+  mutable amd::Monitor lockForInitHeap_;
+  mutable amd::Monitor lockPAL_;          //!< Lock to serialise PAL access
+  mutable amd::Monitor vgpusAccess_;      //!< Lock to serialise virtual gpu list access
+  mutable amd::Monitor scratchAlloc_;     //!< Lock to serialise scratch allocation
+  mutable amd::Monitor mapCacheOps_;      //!< Lock to serialise cache for the map resources
+  mutable amd::Monitor lockResourceOps_;  //!< Lock to serialise resource access
+  XferBuffers* xferRead_;                 //!< Transfer buffers read
+  std::vector<amd::Memory*>* mapCache_;   //!< Map cache info structure
+  ResourceCache* resourceCache_;          //!< Resource cache
+  uint numComputeEngines_;                //!< The number of available compute engines
+  std::map<ExclusiveQueueType, uint32_t>
+      exclusiveComputeEnginesId_;        //!< The number of available compute engines
   uint numDmaEngines_;                   //!< The number of available compute engines
   bool heapInitComplete_;                //!< Keep track of initialization status of heap resources
   VirtualGPU* xferQueue_;                //!< Transfer queue
@@ -594,10 +593,13 @@ class Device : public NullDevice {
   mutable bool freeCPUMem_;              //!< flag to mark GPU free SVM CPU mem
   Pal::DeviceProperties properties_;     //!< PAL device properties
   Pal::IDevice* device_;                 //!< PAL device object
-  mutable std::atomic<Pal::gpusize> allocedMem[Pal::GpuHeap::GpuHeapCount];  //!< Free memory counter
-  std::unordered_set<Resource*>* resourceList_;   //!< Active resource list
-  RgpCaptureMgr*   rgpCaptureMgr_;       //!< RGP capture manager
-  Pal::GpuMemoryHeapProperties heaps_[Pal::GpuHeapCount]; //!< Information about heaps, returned from PAL
+  mutable std::atomic<Pal::gpusize>
+      allocedMem[Pal::GpuHeap::GpuHeapCount];    //!< Free memory counter
+  std::unordered_set<Resource*>* resourceList_;  //!< Active resource list
+  RgpCaptureMgr* rgpCaptureMgr_;                 //!< RGP capture manager
+  Pal::GpuMemoryHeapProperties
+      heaps_[Pal::GpuHeapCount];  //!< Information about heaps, returned from PAL
 };
 
-/*@}*/} // namespace pal
+/*@}*/  // namespace pal
+}  // namespace pal
diff --git a/projects/clr/rocclr/runtime/device/pal/paldeviced3d10.cpp b/projects/clr/rocclr/runtime/device/pal/paldeviced3d10.cpp
index e7d31a9d86..202fca7ef6 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldeviced3d10.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldeviced3d10.cpp
@@ -3,19 +3,19 @@
 #if defined(ATI_OS_LINUX)
 namespace pal {
 bool Device::associateD3D10Device(void* d3d10Device) { return false; }
-}  // pal
+}  // namespace pal
 #else  // !ATI_OS_WIN
 
 #include <D3D10_1.h>
 
 /**************************************************************************************************************
-* Note: ideally the DXX extension interfaces should be mapped from the DXX perforce branch.
-* This means OCL client spec will need to change to include headers directly from the DXX perforce
-*tree.
-* However, OCL only cares about the DXX OpenCL extension interface class. The spec cannot change
-* without notification. So it is safe to use a local copy of the relevant DXX extension interface
-*classes.
-**************************************************************************************************************/
+ * Note: ideally the DXX extension interfaces should be mapped from the DXX perforce branch.
+ * This means OCL client spec will need to change to include headers directly from the DXX perforce
+ *tree.
+ * However, OCL only cares about the DXX OpenCL extension interface class. The spec cannot change
+ * without notification. So it is safe to use a local copy of the relevant DXX extension interface
+ *classes.
+ **************************************************************************************************************/
 #include "DxxOpenCLInteropExt.h"
 
 namespace pal {
@@ -127,6 +127,6 @@ bool Device::associateD3D10Device(void* d3d10Device) {
   return canInteroperate;
 }
 
-}  // pal
+}  // namespace pal
 
 #endif  // !ATI_OS_WIN
diff --git a/projects/clr/rocclr/runtime/device/pal/paldeviced3d11.cpp b/projects/clr/rocclr/runtime/device/pal/paldeviced3d11.cpp
index 025b8ed9a5..00d852d80e 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldeviced3d11.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldeviced3d11.cpp
@@ -3,19 +3,19 @@
 #if defined(ATI_OS_LINUX)
 namespace pal {
 bool Device::associateD3D11Device(void* d3d11Device) { return false; }
-}
+}  // namespace pal
 #else  // !ATI_OS_LINUX
 
 #include <D3D11.h>
 
 /**************************************************************************************************************
-* Note: ideally the DXX extension interfaces should be mapped from the DXX perforce branch.
-* This means OCL client spec will need to change to include headers directly from the DXX perforce
-*tree.
-* However, OCL only cares about the DXX OpenCL extension interface class. The spec cannot change
-* without notification. So it is safe to use a local copy of the relevant DXX extension interface
-*classes.
-**************************************************************************************************************/
+ * Note: ideally the DXX extension interfaces should be mapped from the DXX perforce branch.
+ * This means OCL client spec will need to change to include headers directly from the DXX perforce
+ *tree.
+ * However, OCL only cares about the DXX OpenCL extension interface class. The spec cannot change
+ * without notification. So it is safe to use a local copy of the relevant DXX extension interface
+ *classes.
+ **************************************************************************************************************/
 #include "DxxOpenCLInteropExt.h"
 
 namespace pal {
@@ -128,6 +128,6 @@ bool Device::associateD3D11Device(void* d3d11Device) {
   return canInteroperate;
 }
 
-}  // pal
+}  // namespace pal
 
 #endif  // !ATI_OS_LINUX
diff --git a/projects/clr/rocclr/runtime/device/pal/paldeviced3d9.cpp b/projects/clr/rocclr/runtime/device/pal/paldeviced3d9.cpp
index a589d2abcf..cf2ee5303c 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldeviced3d9.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldeviced3d9.cpp
@@ -3,20 +3,20 @@
 #if defined(ATI_OS_LINUX)
 namespace pal {
 bool Device::associateD3D9Device(void* d3dDevice) { return false; }
-}
+}  // namespace pal
 #else  // !ATI_OS_LINUX
 
 #include <d3d9.h>
 #include <dxgi.h>
 
 /**************************************************************************************************************
-* Note: ideally the DXX extension interfaces should be mapped from the DXX perforce branch.
-* This means OCL client spec will need to change to include headers directly from the DXX perforce
-*tree.
-* However, OCL only cares about the DXX OpenCL extension interface class. The spec cannot change
-* without notification. So it is safe to use a local copy of the relevant DXX extension interface
-*classes.
-**************************************************************************************************************/
+ * Note: ideally the DXX extension interfaces should be mapped from the DXX perforce branch.
+ * This means OCL client spec will need to change to include headers directly from the DXX perforce
+ *tree.
+ * However, OCL only cares about the DXX OpenCL extension interface class. The spec cannot change
+ * without notification. So it is safe to use a local copy of the relevant DXX extension interface
+ *classes.
+ **************************************************************************************************************/
 #include "DxxOpenCLInteropExt.h"
 
 namespace pal {
@@ -44,5 +44,5 @@ bool Device::associateD3D9Device(void* d3d9Device) {
   return canInteroperate;
 }
 
-}  // pal
+}  // namespace pal
 #endif  // !ATI_OS_WIN
diff --git a/projects/clr/rocclr/runtime/device/pal/paldevicegl.cpp b/projects/clr/rocclr/runtime/device/pal/paldevicegl.cpp
index ac209191ca..1d8e9df9e7 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldevicegl.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldevicegl.cpp
@@ -45,8 +45,8 @@ typedef struct _mesa_glinterop_device_info {
 #ifdef ATI_OS_LINUX
 typedef void* (*PFNGlxGetProcAddress)(const GLubyte* procName);
 static PFNGlxGetProcAddress pfnGlxGetProcAddress = nullptr;
-typedef int(APIENTRYP PFNMesaGLInteropGLXQueryDeviceInfo)(
-    Display* dpy, GLXContext context, mesa_glinterop_device_info* out);
+typedef int(APIENTRYP PFNMesaGLInteropGLXQueryDeviceInfo)(Display* dpy, GLXContext context,
+                                                          mesa_glinterop_device_info* out);
 static PFNMesaGLInteropGLXQueryDeviceInfo pfnMesaGLInteropGLXQueryDeviceInfo = nullptr;
 static PFNGLXBEGINCLINTEROPAMD glXBeginCLInteropAMD = nullptr;
 static PFNGLXENDCLINTEROPAMD glXEndCLInteropAMD = nullptr;
@@ -68,480 +68,579 @@ static PFNWGLGETCONTEXTGPUINFOAMD wglGetContextGPUInfoAMD = nullptr;
 namespace pal {
 
 //
-/// GSL Surface Formats as per defined in cmSurfFmtEnum enum in //depot/stg/ugl/drivers/ugl/src/include/cm_enum.h
+/// GSL Surface Formats as per defined in cmSurfFmtEnum enum in
+/// //depot/stg/ugl/drivers/ugl/src/include/cm_enum.h
 //
 typedef enum cmSurfFmtEnum {
-    CM_SURF_FMT_NOOVERRIDE = -1,
-    CM_SURF_FMT_LUMINANCE8,                         ///< Luminance,  8 bits per element packed as (@c LLLLLLLL)
-    CM_SURF_FMT_LUMINANCE16,                        ///< Luminance, 16 bits per element packed as (@c LLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_LUMINANCE16F,                       ///< Luminance, 16 bits per element packed as (@c LLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_LUMINANCE32F,                       ///< Luminance, 32 bits per element packed as (@c LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_INTENSITY8,                         ///< Intensity,  8 bits per element packed as (@c IIIIIIII)
-    CM_SURF_FMT_INTENSITY16,                        ///< Intensity, 16 bits per element packed as (@c IIIIIIIIIIIIIIII)
-    CM_SURF_FMT_INTENSITY16F,                       ///< Intensity, 16 bits per element packed as (@c IIIIIIIIIIIIIIII)
-    CM_SURF_FMT_INTENSITY32F,                       ///< Intensity, 32 bits per element packed as (@c IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII)
-    CM_SURF_FMT_ALPHA8,                             ///< Alpha,      8 bits per element packed as (@c AAAAAAAA)
-    CM_SURF_FMT_ALPHA16,                            ///< Alpha,     16 bits per element packed as (@c AAAAAAAAAAAAAAAA)
-    CM_SURF_FMT_ALPHA16F,                           ///< Alpha,     16 bits per element packed as (@c AAAAAAAAAAAAAAAA)
-    CM_SURF_FMT_ALPHA32F,                           ///< Alpha,     32 bits per element packed as (@c AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA)
-    CM_SURF_FMT_LUMINANCE8_ALPHA8,                  ///< Luminance Alpha, 16 bits per element packed as (@c AAAAAAAALLLLLLLL)
-    CM_SURF_FMT_LUMINANCE16_ALPHA16,                ///< Luminance Alpha, 32 bits per element packed as (@c AAAAAAAAAAAAAAAALLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_LUMINANCE16F_ALPHA16F,              ///< Luminance Alpha, 32 bits per element packed as (@c AAAAAAAAAAAAAAAALLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_LUMINANCE32F_ALPHA32F,              ///< Luminance Alpha, 64 bits per element packed as (@c AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_B2_G3_R3,                           ///< RGB,    8 bits per element packed as (@c RRRGGGBB)
-    CM_SURF_FMT_B5_G6_R5,                           ///< RGB,   16 bits per element packed as (@c RRRRRGGGGGGBBBBB)
-    CM_SURF_FMT_BGRX4,                              ///< RGB,   16 bits per element packed as (@c XXXXRRRRGGGGBBBB)
-    CM_SURF_FMT_BGR5_X1,                            ///< RGB,   16 bits per element packed as (@c XRRRRRGGGGGBBBBB)
-    CM_SURF_FMT_BGRX8,                              ///< RGB,   32 bits per element packed as (@c XXXXXXXXRRRRRRRRGGGGGGGGBBBBBBBB) - XXX unused by current driver
-    CM_SURF_FMT_BGR10_X2,                           ///< RGB,   32 bits per element packed as (@c XXRRRRRRRRRRGGGGGGGGGGBBBBBBBBBB)
-    CM_SURF_FMT_BGRX16,                             ///< RGB,   64 bits per element packed as (@c XXXXXXXXXXXXXXXXRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBB)
-    CM_SURF_FMT_BGRX16F,                            ///< RGB,   64 bits per element packed as (@c XXXXXXXXXXXXXXXXRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBB)
-    CM_SURF_FMT_BGRX32F,                            ///< RGB,  128 bits per element packed as (@c XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB)
-    CM_SURF_FMT_RGBX4,                              ///< RGB,   16 bits per element packed as (@c XXXXBBBBGGGGRRRR)
-    CM_SURF_FMT_RGB5_X1,                            ///< RGB,   16 bits per element packed as (@c XBBBBBGGGGGRRRRR)
-    CM_SURF_FMT_RGBX8,                              ///< RGB,   32 bits per element packed as (@c XXXXXXXXBBBBBBBBGGGGGGGGRRRRRRRR)
-    CM_SURF_FMT_RGB10_X2,                           ///< RGB,   32 bits per element packed as (@c XXBBBBBBBBBBGGGGGGGGGGRRRRRRRRRR)
-    CM_SURF_FMT_RGBX16,                             ///< RGB,   64 bits per element packed as (@c XXXXXXXXXXXXXXXXBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_RGBX16F,                            ///< RGB,   64 bits per element packed as (@c XXXXXXXXXXXXXXXXBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_RGBX32F,                            ///< RGB,  128 bits per element packed as (@c XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_BGRA4,                              ///< RGBA,  16 bits per element packed as (@c AAAARRRRGGGGBBBB)
-    CM_SURF_FMT_BGR5_A1,                            ///< RGBA,  16 bits per element packed as (@c ARRRRRGGGGGBBBBB)
-    CM_SURF_FMT_BGRA8,                              ///< RGBA,  32 bits per element packed as (@c AAAAAAAARRRRRRRRGGGGGGGGBBBBBBBB)
-    CM_SURF_FMT_BGR10_A2,                           ///< RGBA,  32 bits per element packed as (@c AARRRRRRRRRRGGGGGGGGGGBBBBBBBBBB)
-    CM_SURF_FMT_BGRA16,                             ///< RGBA,  64 bits per element packed as (@c AAAAAAAAAAAAAAAARRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBB)
-    CM_SURF_FMT_BGRA16F,                            ///< RGBA,  64 bits per element packed as (@c AAAAAAAAAAAAAAAARRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBB)
-    CM_SURF_FMT_BGRA32F,                            ///< RGBA, 128 bits per element packed as (@c AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAARRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB)
-    CM_SURF_FMT_RGBA4,                              ///< RGBA,  16 bits per element packed as (@c AAAABBBBGGGGRRRR)
-    CM_SURF_FMT_RGB5_A1,                            ///< RGBA,  16 bits per element packed as (@c ABBBBBGGGGGRRRRR)
-    CM_SURF_FMT_RGBA8,                              ///< RGBA,  32 bits per element packed as (@c AAAAAAAABBBBBBBBGGGGGGGGRRRRRRRR)
-    CM_SURF_FMT_RGB10_A2,                           ///< RGBA,  32 bits per element packed as (@c AABBBBBBBBBBGGGGGGGGGGRRRRRRRRRR)
-    CM_SURF_FMT_RGBA16,                             ///< RGBA,  64 bits per element packed as (@c AAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_RGBA16F,                            ///< RGBA,  64 bits per element packed as (@c AAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_RGBA32I,                            ///< RGBA, 128 bits per element packed as (@c AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_RGBA32F,                            ///< RGBA, 128 bits per element packed as (@c AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_DUDV8,                              ///< DUDV   16 bits per element packed as (@c VVVVVVVVUUUUUUUU)
-    CM_SURF_FMT_DXT1,                               ///< compressed, DXT1
-    CM_SURF_FMT_DXT2_3,                             ///< compressed, DXT2_3
-    CM_SURF_FMT_DXT4_5,                             ///< compressed, DXT4_5
-    CM_SURF_FMT_ATI1N,                              ///< compressed, 1 component
-    CM_SURF_FMT_ATI2N,                              ///< compressed, 2 component
-    CM_SURF_FMT_DEPTH16,                            ///< depth, 16 bits per element packed as (@c DDDDDDDDDDDDDDDD)
-    CM_SURF_FMT_DEPTH16F,                           ///< depth, 16 bits per element packed as (@c DDDDDDDDDDDDDDDD)
-    CM_SURF_FMT_DEPTH24_X8,                         ///< depth, 32 bits per element packed as (@c XXXXXXXXDDDDDDDDDDDDDDDDDDDDDDDD)
-    CM_SURF_FMT_DEPTH24F_X8,                        ///< depth, 32 bits per element packed as (@c SSSSSSSSDDDDDDDDDDDDDDDDDDDDDDDD)
-    CM_SURF_FMT_DEPTH24_STEN8,                      ///< depth + stencil, 32 bits per element packed as (@c SSSSSSSSDDDDDDDDDDDDDDDDDDDDDDDD)
-    CM_SURF_FMT_DEPTH24F_STEN8,                     ///< depth + stencil, 32 bits per element packed as (@c SSSSSSSSDDDDDDDDDDDDDDDDDDDDDDDD)
-    CM_SURF_FMT_DEPTH32F_X24_STEN8,                 ///< depth + stencil, 64 bits per element packed as (@c XXXXXXXXXXXXXXXXXXXXXXXXSSSSSSSSDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD)
-    CM_SURF_FMT_DEPTH32F,                           ///< depth, 32 bits per element packed as (@c DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD)
-    CM_SURF_FMT_sR11_sG11_sB10,                     ///< RGB,   32 bits per element packed as (@c RRRRRRRRRRRGGGGGGGGGGGBBBBBBBBBB)
-    CM_SURF_FMT_sU16,                               ///<
-    CM_SURF_FMT_sUV16,                              ///<
-    CM_SURF_FMT_sUVWQ16,                            ///<
-    CM_SURF_FMT_RG16,                               ///< RG,    32 bits per element packed as (@c RRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGG)
-    CM_SURF_FMT_RG16F,                              ///< RG,    32 bits per element packed as (@c RRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGG)
-    CM_SURF_FMT_RG32F,                              ///< RG,    64 bits per element packed as (@c RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG)
-    CM_SURF_FMT_ABGR4,                              ///< RGBA,  16 bits per element packed as (@c RRRRGGGGBBBBAAAA)
-    CM_SURF_FMT_A1_BGR5,                            ///< RGBA,  16 bits per element packed as (@c RRRRRGGGGGBBBBBA)
-    CM_SURF_FMT_ABGR8,                              ///< RGBA,  32 bits per element packed as (@c RRRRRRRRGGGGGGGGBBBBBBBBAAAAAAAA)
-    CM_SURF_FMT_A2_BGR10,                           ///< RGBA,  32 bits per element packed as (@c RRRRRRRRRRGGGGGGGGGGBBBBBBBBBBAA)
-    CM_SURF_FMT_ABGR16,                             ///< RGBA,  64 bits per element packed as (@c RRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBAAAAAAAAAAAAAAAA)
-    CM_SURF_FMT_ABGR16F,                            ///< RGBA,  64 bits per element packed as (@c RRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBAAAAAAAAAAAAAAAA)
-    CM_SURF_FMT_ABGR32F,                            ///< RGBA, 128 bits per element packed as (@c RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA)
-    CM_SURF_FMT_DXT1A,
-    CM_SURF_FMT_sRGB10_A2,                          ///< RGBA,  32  bits per element packed as signed (@c AABBBBBBBBBBGGGGGGGGGGRRRRRRRRRR)
-    CM_SURF_FMT_sR8,                                ///< R,     8   bits per element packed as signed (@c RRRRRRRR)
-    CM_SURF_FMT_sRG8,                               ///< RG,    16  bits per element packed as signed (@c RRRRRRRRGGGGGGGG)
-    CM_SURF_FMT_sR32I,                              ///< R,     32  bits per element packed as signed (@c RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_sRG32I,                             ///< RG,    64  bits per element packed as signed (@c RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG)
-    CM_SURF_FMT_sRGBA32I,                           ///< RGBA,  128 bits per element packed as signed (@c RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA)
-    CM_SURF_FMT_R32I,                               ///< R,     32  bits per element packed as (@c RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_RG32I,                              ///< RG,    64  bits per element packed as (@c RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG)
-    CM_SURF_FMT_RG8,                                ///< RG8,   16 bits per element packed as (@c RRRRRRRRGGGGGGGG)
-    CM_SURF_FMT_sRGBA8,                             ///< RGBA8, 32 bits per element packed as signed (@c RRRRRRRRGGGGGGGGBBBBBBBBAAAAAAAA)
-    CM_SURF_FMT_R11F_G11F_B10F,                     ///< RGB,   32 bits per element packed as (@c BBBBBBBBBBGGGGGGGGGGGRRRRRRRRRRR)
-    CM_SURF_FMT_RGB9_E5,                            ///< RGB,   32 bits per element packed as (@c EEEEEBBBBBBBBBGGGGGGGGGRRRRRRRRR)
-    CM_SURF_FMT_LUMINANCE_LATC1,                    ///< compressed LATC1
-    CM_SURF_FMT_SIGNED_LUMINANCE_LATC1,             ///< compressed signed LATC1
-    CM_SURF_FMT_LUMINANCE_ALPHA_LATC2,              ///< compressed LATC2
-    CM_SURF_FMT_SIGNED_LUMINANCE_ALPHA_LATC2,       ///< compressed signed LATC2
-    CM_SURF_FMT_RED_RGTC1,                          ///< compressed RGTC1
-    CM_SURF_FMT_SIGNED_RED_RGTC1,                   ///< compressed signed RGTC1
-    CM_SURF_FMT_RED_GREEN_RGTC2,                    ///< compressed RGTC2
-    CM_SURF_FMT_SIGNED_RED_GREEN_RGTC2,             ///< compressed signed RGTC2
-    CM_SURF_FMT_R8,                                 ///< R,     8   bits per element packed (@c RRRRRRRR)
-    CM_SURF_FMT_R16,                                ///< R,    16   bits per element packed (@c RRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_R16F,                               ///< R,    16   bits per element packed (@c RRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_R32F,                               ///< R,    32   bits per element packed (@c RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_R8I,                                ///< R,     8   bits per element packed (@c RRRRRRRR)
-    CM_SURF_FMT_sR8I,                               ///< R,     8   bits per element packed as signed (@c RRRRRRRR)
-    CM_SURF_FMT_RG8I,                               ///< RG,   16   bits per element packed (@c RRRRRRRRGGGGGGGG)
-    CM_SURF_FMT_sRG8I,                              ///< RG,   16   bits per element packed as signed (@c RRRRRRRRGGGGGGGG)
-    CM_SURF_FMT_R16I,                               ///< R,    16   bits per element packed (@c RRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_sR16I,                              ///< R,    16   bits per element packed as signed (@c RRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_RG16I,                              ///< RG,   32   bits per element packed (@c RRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGG)
-    CM_SURF_FMT_sRG16I,                             ///< RG,   32   bits per element packed as signed (@c RRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGG)
-    CM_SURF_FMT_RGBA32UI,                           ///< RGBA, 128 bits per element packed as (@c AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAARRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB)
-    CM_SURF_FMT_RGBX32UI,                           ///< RGBX,  128 bits per element packed as(@c XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB)
-    CM_SURF_FMT_ALPHA32UI,                          ///< Alpha, 32 bits per element packed as (@c AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA)
-    CM_SURF_FMT_INTENSITY32UI,                      ///< Intensity, 32 bits per element packed as (@c IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII)
-    CM_SURF_FMT_LUMINANCE32UI,                      ///< Luminance, 32 bits per element packed as (@c LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_LUMINANCE_ALPHA32UI,                ///< Luminance Alpha, 64 bits per element packed as (@c AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_RGBA16UI,                           ///< RGBA,  64 bits per element packed as (@c AAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_RGBX16UI,                           ///< RGB,   64 bits per element packed as (@c XXXXXXXXXXXXXXXXBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_ALPHA16UI,                          ///< Alpha, 16 bits per element packed as (@c AAAAAAAAAAAAAAAA)
-    CM_SURF_FMT_INTENSITY16UI,                      ///< Intensity, 16 bits per element packed as (@c IIIIIIIIIIIIIIII)
-    CM_SURF_FMT_LUMINANCE16UI,                      ///< Luminance, 16 bits per element packed as (@c LLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_LUMINANCE_ALPHA16UI,                ///< Luminance Alpha, 32 bits per element packed as (@c AAAAAAAAAAAAAAAALLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_RGBA8UI,                            ///< RGBA,  32 bits per element packed as (@c AAAAAAAABBBBBBBBGGGGGGGGRRRRRRRR)
-    CM_SURF_FMT_RGBX8UI,                            ///< RGB,   32 bits per element packed as (@c XXXXXXXXBBBBBBBBGGGGGGGGRRRRRRRR)
-    CM_SURF_FMT_ALPHA8UI,                           ///< Alpha, 8 bits per element packed as (@c AAAAAAAA)
-    CM_SURF_FMT_INTENSITY8UI,                       ///< Intensity, 8 bits per element packed as (@c IIIIIIII)
-    CM_SURF_FMT_LUMINANCE8UI,                       ///< Luminance, 8 bits per element packed as (@c LLLLLLLL)
-    CM_SURF_FMT_LUMINANCE_ALPHA8UI,                 ///< Luminance Alpha, 32 bits per element packed as (@c AAAAAAAALLLLLLLL)
-    CM_SURF_FMT_sRGBX32I,                            ///< RGBX,  128 bits per element packed as(@c XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB)
-    CM_SURF_FMT_sALPHA32I,                           ///< Alpha, 32 bits per element packed as (@c AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA)
-    CM_SURF_FMT_sINTENSITY32I,                       ///< Intensity, 32 bits per element packed as (@c IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII)
-    CM_SURF_FMT_sLUMINANCE32I,                       ///< Luminance, 32 bits per element packed as (@c LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_sLUMINANCE_ALPHA32I,                 ///< Luminance Alpha, 64 bits per element packed as (@c AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_sRGBA16I,                            ///< RGBA,  64 bits per element packed as (@c AAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_sRGBX16I,                            ///< RGB,   64 bits per element packed as (@c XXXXXXXXXXXXXXXXBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_sALPHA16I,                           ///< Alpha, 16 bits per element packed as (@c AAAAAAAAAAAAAAAA)
-    CM_SURF_FMT_sINTENSITY16I,                       ///< Intensity, 16 bits per element packed as (@c IIIIIIIIIIIIIIII)
-    CM_SURF_FMT_sLUMINANCE16I,                       ///< Luminance, 16 bits per element packed as (@c LLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_sLUMINANCE_ALPHA16I,                 ///< Luminance Alpha, 32 bits per element packed as (@c AAAAAAAAAAAAAAAALLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_sRGBA8I,                             ///< RGBA,  32 bits per element packed as (@c AAAAAAAABBBBBBBBGGGGGGGGRRRRRRRR)
-    CM_SURF_FMT_sRGBX8I,                             ///< RGB,   32 bits per element packed as (@c XXXXXXXXBBBBBBBBGGGGGGGGRRRRRRRR)
-    CM_SURF_FMT_sALPHA8I,                            ///< Alpha, 8 bits per element packed as (@c AAAAAAAA)
-    CM_SURF_FMT_sINTENSITY8I,                        ///< Intensity, 8 bits per element packed as (@c IIIIIIII)
-    CM_SURF_FMT_sLUMINANCE8I,                        ///< Luminance, 8 bits per element packed as (@c LLLLLLLL)
-    CM_SURF_FMT_sLUMINANCE_ALPHA8I,                  ///< Alpha, 8 bits per element packed as (@c AAAAAAAA)
-    CM_SURF_FMT_sDXT6,                                ///< compressed, CM_SURF_FMT_sDXT6
-    CM_SURF_FMT_DXT6,                                 ///< compressed, CM_SURF_FMT_DXT6
-    CM_SURF_FMT_DXT7,                                 ///< compressed, DXT7
-    CM_SURF_FMT_LUMINANCE8_SNORM,                   ///< Luminance,  8 bits per element packed as signed (@c LLLLLLLL)
-    CM_SURF_FMT_LUMINANCE16_SNORM,                  ///< Luminance, 16 bits per element packed as signed (@c LLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_INTENSITY8_SNORM,                   ///< Intensity,  8 bits per element packed as signed (@c IIIIIIII)
-    CM_SURF_FMT_INTENSITY16_SNORM,                  ///< Intensity, 16 bits per element packed as signed (@c IIIIIIIIIIIIIIII)
-    CM_SURF_FMT_ALPHA8_SNORM,                       ///< Alpha,      8 bits per element packed as signed (@c AAAAAAAA)
-    CM_SURF_FMT_ALPHA16_SNORM,                      ///< Alpha,     16 bits per element packed as signed (@c AAAAAAAAAAAAAAAA)
-    CM_SURF_FMT_LUMINANCE_ALPHA8_SNORM,             ///< Luminance Alpha, 16 bits per element packed as signed (@c AAAAAAAALLLLLLLL)
-    CM_SURF_FMT_LUMINANCE_ALPHA16_SNORM,            ///< Luminance Alpha, 32 bits per element packed as signed (@c AAAAAAAAAAAAAAAALLLLLLLLLLLLLLLL)
-    CM_SURF_FMT_R8_SNORM,                           ///< R,     8   bits per element packed as signed (@c RRRRRRRR)
-    CM_SURF_FMT_R16_SNORM,                          ///< R,    16   bits per element packed as signed (@c RRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_RG8_SNORM,                          ///< RG8,   16 bits per element packed as signed (@c RRRRRRRRGGGGGGGG)
-    CM_SURF_FMT_RG16_SNORM,                         ///< RG,    32 bits per element packed as signed (@c RRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGG)
-    CM_SURF_FMT_RGBX8_SNORM,                        ///< RGB,   32 bits per element packed as signed (@c XXXXXXXXBBBBBBBBGGGGGGGGRRRRRRRR)
-    CM_SURF_FMT_RGBX16_SNORM,                       ///< RGB,   64 bits per element packed as signed (@c XXXXXXXXXXXXXXXXBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_RGBA8_SNORM,                        ///< RGBA,  32 bits per element packed as signed (@c AAAAAAAABBBBBBBBGGGGGGGGRRRRRRRR)
-    CM_SURF_FMT_RGBA16_SNORM,                       ///< RGBA,  64 bits per element packed as signed (@c AAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_RGB10_A2UI,                         ///< RGBA,  32 bits per element packed as (@c AABBBBBBBBBBGGGGGGGGGGRRRRRRRRRR)
-    CM_SURF_FMT_RGB32F,                             ///< RGB, float, 96 bits per element packed as (@c BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_RGB32I,                             ///< RGB, unnormalized int, 96 bits per element packed as (@c BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_RGB32UI,                            ///< RGB, unnormalized uint, 96 bits per element packed as (@c BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
-    CM_SURF_FMT_RGBX8_SRGB,                         ///< RGB,   32 bits per element packed as (@c XXXXXXXXBBBBBBBBGGGGGGGGRRRRRRRR)
-    CM_SURF_FMT_RGBA8_SRGB,                         ///< RGBA,  32 bits per element packed as (@c AAAAAAAABBBBBBBBGGGGGGGGRRRRRRRR)
-    CM_SURF_FMT_DXT1_SRGB,                          ///< compressed, DXT1
-    CM_SURF_FMT_DXT1A_SRGB,                         ///<
-    CM_SURF_FMT_DXT2_3_SRGB,                        ///< compressed, DXT2_3
-    CM_SURF_FMT_DXT4_5_SRGB,                        ///< compressed, DXT4_5
-    CM_SURF_FMT_DXT7_SRGB,                          ///< compressed, DXT7
-    CM_SURF_FMT_RGB8_ETC2,                          ///< ETC2 compressed, RGB8 in 64 bits
-    CM_SURF_FMT_SRGB8_ETC2,                         ///< ETC2 compressed, SRGB8 in 64 bits
-    CM_SURF_FMT_RGB8_PT_ALPHA1_ETC2,                ///< ETC2 compressed, RGB8 in 64 bits
-    CM_SURF_FMT_SRGB8_PT_ALPHA1_ETC2,               ///< ETC2 compressed, sRGB8A1 in 64 bits
-    CM_SURF_FMT_RGBA8_ETC2_EAC,                     ///< ETC2 compressed, RGBA8 in 128 bits
-    CM_SURF_FMT_SRGB8_ALPHA8_ETC2_EAC,              ///< ETC2 compressed, sRGBA8 in 128 bits
-    CM_SURF_FMT_R11_EAC,                            ///< EAC compressed, R11 in 64 bits
-    CM_SURF_FMT_SIGNED_R11_EAC,                     ///< EAC compressed, signed R11 in 64 bits
-    CM_SURF_FMT_RG11_EAC,                           ///< EAC compressed, RG11 in 128 bits
-    CM_SURF_FMT_SIGNED_RG11_EAC,                    ///< EAC compressed, signed RG11 in 128 bits
+  CM_SURF_FMT_NOOVERRIDE = -1,
+  CM_SURF_FMT_LUMINANCE8,    ///< Luminance,  8 bits per element packed as (@c LLLLLLLL)
+  CM_SURF_FMT_LUMINANCE16,   ///< Luminance, 16 bits per element packed as (@c LLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_LUMINANCE16F,  ///< Luminance, 16 bits per element packed as (@c LLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_LUMINANCE32F,  ///< Luminance, 32 bits per element packed as (@c
+                             ///< LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_INTENSITY8,    ///< Intensity,  8 bits per element packed as (@c IIIIIIII)
+  CM_SURF_FMT_INTENSITY16,   ///< Intensity, 16 bits per element packed as (@c IIIIIIIIIIIIIIII)
+  CM_SURF_FMT_INTENSITY16F,  ///< Intensity, 16 bits per element packed as (@c IIIIIIIIIIIIIIII)
+  CM_SURF_FMT_INTENSITY32F,  ///< Intensity, 32 bits per element packed as (@c
+                             ///< IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII)
+  CM_SURF_FMT_ALPHA8,        ///< Alpha,      8 bits per element packed as (@c AAAAAAAA)
+  CM_SURF_FMT_ALPHA16,       ///< Alpha,     16 bits per element packed as (@c AAAAAAAAAAAAAAAA)
+  CM_SURF_FMT_ALPHA16F,      ///< Alpha,     16 bits per element packed as (@c AAAAAAAAAAAAAAAA)
+  CM_SURF_FMT_ALPHA32F,      ///< Alpha,     32 bits per element packed as (@c
+                             ///< AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA)
+  CM_SURF_FMT_LUMINANCE8_ALPHA8,      ///< Luminance Alpha, 16 bits per element packed as (@c
+                                      ///< AAAAAAAALLLLLLLL)
+  CM_SURF_FMT_LUMINANCE16_ALPHA16,    ///< Luminance Alpha, 32 bits per element packed as (@c
+                                      ///< AAAAAAAAAAAAAAAALLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_LUMINANCE16F_ALPHA16F,  ///< Luminance Alpha, 32 bits per element packed as (@c
+                                      ///< AAAAAAAAAAAAAAAALLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_LUMINANCE32F_ALPHA32F,  ///< Luminance Alpha, 64 bits per element packed as (@c
+                                      ///< AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_B2_G3_R3,  ///< RGB,    8 bits per element packed as (@c RRRGGGBB)
+  CM_SURF_FMT_B5_G6_R5,  ///< RGB,   16 bits per element packed as (@c RRRRRGGGGGGBBBBB)
+  CM_SURF_FMT_BGRX4,     ///< RGB,   16 bits per element packed as (@c XXXXRRRRGGGGBBBB)
+  CM_SURF_FMT_BGR5_X1,   ///< RGB,   16 bits per element packed as (@c XRRRRRGGGGGBBBBB)
+  CM_SURF_FMT_BGRX8,     ///< RGB,   32 bits per element packed as (@c
+                         ///< XXXXXXXXRRRRRRRRGGGGGGGGBBBBBBBB) - XXX unused by current driver
+  CM_SURF_FMT_BGR10_X2,  ///< RGB,   32 bits per element packed as (@c
+                         ///< XXRRRRRRRRRRGGGGGGGGGGBBBBBBBBBB)
+  CM_SURF_FMT_BGRX16,    ///< RGB,   64 bits per element packed as (@c
+                         ///< XXXXXXXXXXXXXXXXRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBB)
+  CM_SURF_FMT_BGRX16F,   ///< RGB,   64 bits per element packed as (@c
+                         ///< XXXXXXXXXXXXXXXXRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBB)
+  CM_SURF_FMT_BGRX32F,   ///< RGB,  128 bits per element packed as (@c
+                        ///< XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB)
+  CM_SURF_FMT_RGBX4,     ///< RGB,   16 bits per element packed as (@c XXXXBBBBGGGGRRRR)
+  CM_SURF_FMT_RGB5_X1,   ///< RGB,   16 bits per element packed as (@c XBBBBBGGGGGRRRRR)
+  CM_SURF_FMT_RGBX8,     ///< RGB,   32 bits per element packed as (@c
+                         ///< XXXXXXXXBBBBBBBBGGGGGGGGRRRRRRRR)
+  CM_SURF_FMT_RGB10_X2,  ///< RGB,   32 bits per element packed as (@c
+                         ///< XXBBBBBBBBBBGGGGGGGGGGRRRRRRRRRR)
+  CM_SURF_FMT_RGBX16,    ///< RGB,   64 bits per element packed as (@c
+                         ///< XXXXXXXXXXXXXXXXBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_RGBX16F,   ///< RGB,   64 bits per element packed as (@c
+                         ///< XXXXXXXXXXXXXXXXBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_RGBX32F,   ///< RGB,  128 bits per element packed as (@c
+                        ///< XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_BGRA4,     ///< RGBA,  16 bits per element packed as (@c AAAARRRRGGGGBBBB)
+  CM_SURF_FMT_BGR5_A1,   ///< RGBA,  16 bits per element packed as (@c ARRRRRGGGGGBBBBB)
+  CM_SURF_FMT_BGRA8,     ///< RGBA,  32 bits per element packed as (@c
+                         ///< AAAAAAAARRRRRRRRGGGGGGGGBBBBBBBB)
+  CM_SURF_FMT_BGR10_A2,  ///< RGBA,  32 bits per element packed as (@c
+                         ///< AARRRRRRRRRRGGGGGGGGGGBBBBBBBBBB)
+  CM_SURF_FMT_BGRA16,    ///< RGBA,  64 bits per element packed as (@c
+                         ///< AAAAAAAAAAAAAAAARRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBB)
+  CM_SURF_FMT_BGRA16F,   ///< RGBA,  64 bits per element packed as (@c
+                         ///< AAAAAAAAAAAAAAAARRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBB)
+  CM_SURF_FMT_BGRA32F,   ///< RGBA, 128 bits per element packed as (@c
+                        ///< AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAARRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB)
+  CM_SURF_FMT_RGBA4,     ///< RGBA,  16 bits per element packed as (@c AAAABBBBGGGGRRRR)
+  CM_SURF_FMT_RGB5_A1,   ///< RGBA,  16 bits per element packed as (@c ABBBBBGGGGGRRRRR)
+  CM_SURF_FMT_RGBA8,     ///< RGBA,  32 bits per element packed as (@c
+                         ///< AAAAAAAABBBBBBBBGGGGGGGGRRRRRRRR)
+  CM_SURF_FMT_RGB10_A2,  ///< RGBA,  32 bits per element packed as (@c
+                         ///< AABBBBBBBBBBGGGGGGGGGGRRRRRRRRRR)
+  CM_SURF_FMT_RGBA16,    ///< RGBA,  64 bits per element packed as (@c
+                         ///< AAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_RGBA16F,   ///< RGBA,  64 bits per element packed as (@c
+                         ///< AAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_RGBA32I,   ///< RGBA, 128 bits per element packed as (@c
+                        ///< AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_RGBA32F,  ///< RGBA, 128 bits per element packed as (@c
+                        ///< AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_DUDV8,    ///< DUDV   16 bits per element packed as (@c VVVVVVVVUUUUUUUU)
+  CM_SURF_FMT_DXT1,     ///< compressed, DXT1
+  CM_SURF_FMT_DXT2_3,   ///< compressed, DXT2_3
+  CM_SURF_FMT_DXT4_5,   ///< compressed, DXT4_5
+  CM_SURF_FMT_ATI1N,    ///< compressed, 1 component
+  CM_SURF_FMT_ATI2N,    ///< compressed, 2 component
+  CM_SURF_FMT_DEPTH16,  ///< depth, 16 bits per element packed as (@c DDDDDDDDDDDDDDDD)
+  CM_SURF_FMT_DEPTH16F,            ///< depth, 16 bits per element packed as (@c DDDDDDDDDDDDDDDD)
+  CM_SURF_FMT_DEPTH24_X8,          ///< depth, 32 bits per element packed as (@c
+                                   ///< XXXXXXXXDDDDDDDDDDDDDDDDDDDDDDDD)
+  CM_SURF_FMT_DEPTH24F_X8,         ///< depth, 32 bits per element packed as (@c
+                                   ///< SSSSSSSSDDDDDDDDDDDDDDDDDDDDDDDD)
+  CM_SURF_FMT_DEPTH24_STEN8,       ///< depth + stencil, 32 bits per element packed as (@c
+                                   ///< SSSSSSSSDDDDDDDDDDDDDDDDDDDDDDDD)
+  CM_SURF_FMT_DEPTH24F_STEN8,      ///< depth + stencil, 32 bits per element packed as (@c
+                                   ///< SSSSSSSSDDDDDDDDDDDDDDDDDDDDDDDD)
+  CM_SURF_FMT_DEPTH32F_X24_STEN8,  ///< depth + stencil, 64 bits per element packed as (@c
+                                   ///< XXXXXXXXXXXXXXXXXXXXXXXXSSSSSSSSDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD)
+  CM_SURF_FMT_DEPTH32F,        ///< depth, 32 bits per element packed as (@c
+                               ///< DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD)
+  CM_SURF_FMT_sR11_sG11_sB10,  ///< RGB,   32 bits per element packed as (@c
+                               ///< RRRRRRRRRRRGGGGGGGGGGGBBBBBBBBBB)
+  CM_SURF_FMT_sU16,            ///<
+  CM_SURF_FMT_sUV16,           ///<
+  CM_SURF_FMT_sUVWQ16,         ///<
+  CM_SURF_FMT_RG16,  ///< RG,    32 bits per element packed as (@c RRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGG)
+  CM_SURF_FMT_RG16F,     ///< RG,    32 bits per element packed as (@c
+                         ///< RRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGG)
+  CM_SURF_FMT_RG32F,     ///< RG,    64 bits per element packed as (@c
+                         ///< RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG)
+  CM_SURF_FMT_ABGR4,     ///< RGBA,  16 bits per element packed as (@c RRRRGGGGBBBBAAAA)
+  CM_SURF_FMT_A1_BGR5,   ///< RGBA,  16 bits per element packed as (@c RRRRRGGGGGBBBBBA)
+  CM_SURF_FMT_ABGR8,     ///< RGBA,  32 bits per element packed as (@c
+                         ///< RRRRRRRRGGGGGGGGBBBBBBBBAAAAAAAA)
+  CM_SURF_FMT_A2_BGR10,  ///< RGBA,  32 bits per element packed as (@c
+                         ///< RRRRRRRRRRGGGGGGGGGGBBBBBBBBBBAA)
+  CM_SURF_FMT_ABGR16,    ///< RGBA,  64 bits per element packed as (@c
+                         ///< RRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBAAAAAAAAAAAAAAAA)
+  CM_SURF_FMT_ABGR16F,   ///< RGBA,  64 bits per element packed as (@c
+                         ///< RRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBAAAAAAAAAAAAAAAA)
+  CM_SURF_FMT_ABGR32F,   ///< RGBA, 128 bits per element packed as (@c
+                        ///< RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA)
+  CM_SURF_FMT_DXT1A,
+  CM_SURF_FMT_sRGB10_A2,  ///< RGBA,  32  bits per element packed as signed (@c
+                          ///< AABBBBBBBBBBGGGGGGGGGGRRRRRRRRRR)
+  CM_SURF_FMT_sR8,        ///< R,     8   bits per element packed as signed (@c RRRRRRRR)
+  CM_SURF_FMT_sRG8,       ///< RG,    16  bits per element packed as signed (@c RRRRRRRRGGGGGGGG)
+  CM_SURF_FMT_sR32I,      ///< R,     32  bits per element packed as signed (@c
+                          ///< RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_sRG32I,     ///< RG,    64  bits per element packed as signed (@c
+                          ///< RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG)
+  CM_SURF_FMT_sRGBA32I,   ///< RGBA,  128 bits per element packed as signed (@c
+                         ///< RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA)
+  CM_SURF_FMT_R32I,    ///< R,     32  bits per element packed as (@c
+                       ///< RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_RG32I,   ///< RG,    64  bits per element packed as (@c
+                       ///< RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG)
+  CM_SURF_FMT_RG8,     ///< RG8,   16 bits per element packed as (@c RRRRRRRRGGGGGGGG)
+  CM_SURF_FMT_sRGBA8,  ///< RGBA8, 32 bits per element packed as signed (@c
+                       ///< RRRRRRRRGGGGGGGGBBBBBBBBAAAAAAAA)
+  CM_SURF_FMT_R11F_G11F_B10F,                ///< RGB,   32 bits per element packed as (@c
+                                             ///< BBBBBBBBBBGGGGGGGGGGGRRRRRRRRRRR)
+  CM_SURF_FMT_RGB9_E5,                       ///< RGB,   32 bits per element packed as (@c
+                                             ///< EEEEEBBBBBBBBBGGGGGGGGGRRRRRRRRR)
+  CM_SURF_FMT_LUMINANCE_LATC1,               ///< compressed LATC1
+  CM_SURF_FMT_SIGNED_LUMINANCE_LATC1,        ///< compressed signed LATC1
+  CM_SURF_FMT_LUMINANCE_ALPHA_LATC2,         ///< compressed LATC2
+  CM_SURF_FMT_SIGNED_LUMINANCE_ALPHA_LATC2,  ///< compressed signed LATC2
+  CM_SURF_FMT_RED_RGTC1,                     ///< compressed RGTC1
+  CM_SURF_FMT_SIGNED_RED_RGTC1,              ///< compressed signed RGTC1
+  CM_SURF_FMT_RED_GREEN_RGTC2,               ///< compressed RGTC2
+  CM_SURF_FMT_SIGNED_RED_GREEN_RGTC2,        ///< compressed signed RGTC2
+  CM_SURF_FMT_R8,                            ///< R,     8   bits per element packed (@c RRRRRRRR)
+  CM_SURF_FMT_R16,     ///< R,    16   bits per element packed (@c RRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_R16F,    ///< R,    16   bits per element packed (@c RRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_R32F,    ///< R,    32   bits per element packed (@c RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_R8I,     ///< R,     8   bits per element packed (@c RRRRRRRR)
+  CM_SURF_FMT_sR8I,    ///< R,     8   bits per element packed as signed (@c RRRRRRRR)
+  CM_SURF_FMT_RG8I,    ///< RG,   16   bits per element packed (@c RRRRRRRRGGGGGGGG)
+  CM_SURF_FMT_sRG8I,   ///< RG,   16   bits per element packed as signed (@c RRRRRRRRGGGGGGGG)
+  CM_SURF_FMT_R16I,    ///< R,    16   bits per element packed (@c RRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_sR16I,   ///< R,    16   bits per element packed as signed (@c RRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_RG16I,   ///< RG,   32   bits per element packed (@c RRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGG)
+  CM_SURF_FMT_sRG16I,  ///< RG,   32   bits per element packed as signed (@c
+                       ///< RRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGG)
+  CM_SURF_FMT_RGBA32UI,  ///< RGBA, 128 bits per element packed as (@c
+                         ///< AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAARRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB)
+  CM_SURF_FMT_RGBX32UI,  ///< RGBX,  128 bits per element packed as(@c
+                         ///< XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB)
+  CM_SURF_FMT_ALPHA32UI,            ///< Alpha, 32 bits per element packed as (@c
+                                    ///< AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA)
+  CM_SURF_FMT_INTENSITY32UI,        ///< Intensity, 32 bits per element packed as (@c
+                                    ///< IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII)
+  CM_SURF_FMT_LUMINANCE32UI,        ///< Luminance, 32 bits per element packed as (@c
+                                    ///< LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_LUMINANCE_ALPHA32UI,  ///< Luminance Alpha, 64 bits per element packed as (@c
+                                    ///< AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_RGBA16UI,       ///< RGBA,  64 bits per element packed as (@c
+                              ///< AAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_RGBX16UI,       ///< RGB,   64 bits per element packed as (@c
+                              ///< XXXXXXXXXXXXXXXXBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_ALPHA16UI,      ///< Alpha, 16 bits per element packed as (@c AAAAAAAAAAAAAAAA)
+  CM_SURF_FMT_INTENSITY16UI,  ///< Intensity, 16 bits per element packed as (@c IIIIIIIIIIIIIIII)
+  CM_SURF_FMT_LUMINANCE16UI,  ///< Luminance, 16 bits per element packed as (@c LLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_LUMINANCE_ALPHA16UI,  ///< Luminance Alpha, 32 bits per element packed as (@c
+                                    ///< AAAAAAAAAAAAAAAALLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_RGBA8UI,              ///< RGBA,  32 bits per element packed as (@c
+                                    ///< AAAAAAAABBBBBBBBGGGGGGGGRRRRRRRR)
+  CM_SURF_FMT_RGBX8UI,              ///< RGB,   32 bits per element packed as (@c
+                                    ///< XXXXXXXXBBBBBBBBGGGGGGGGRRRRRRRR)
+  CM_SURF_FMT_ALPHA8UI,             ///< Alpha, 8 bits per element packed as (@c AAAAAAAA)
+  CM_SURF_FMT_INTENSITY8UI,         ///< Intensity, 8 bits per element packed as (@c IIIIIIII)
+  CM_SURF_FMT_LUMINANCE8UI,         ///< Luminance, 8 bits per element packed as (@c LLLLLLLL)
+  CM_SURF_FMT_LUMINANCE_ALPHA8UI,   ///< Luminance Alpha, 32 bits per element packed as (@c
+                                    ///< AAAAAAAALLLLLLLL)
+  CM_SURF_FMT_sRGBX32I,             ///< RGBX,  128 bits per element packed as(@c
+                         ///< XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB)
+  CM_SURF_FMT_sALPHA32I,            ///< Alpha, 32 bits per element packed as (@c
+                                    ///< AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA)
+  CM_SURF_FMT_sINTENSITY32I,        ///< Intensity, 32 bits per element packed as (@c
+                                    ///< IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII)
+  CM_SURF_FMT_sLUMINANCE32I,        ///< Luminance, 32 bits per element packed as (@c
+                                    ///< LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_sLUMINANCE_ALPHA32I,  ///< Luminance Alpha, 64 bits per element packed as (@c
+                                    ///< AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAALLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_sRGBA16I,       ///< RGBA,  64 bits per element packed as (@c
+                              ///< AAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_sRGBX16I,       ///< RGB,   64 bits per element packed as (@c
+                              ///< XXXXXXXXXXXXXXXXBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_sALPHA16I,      ///< Alpha, 16 bits per element packed as (@c AAAAAAAAAAAAAAAA)
+  CM_SURF_FMT_sINTENSITY16I,  ///< Intensity, 16 bits per element packed as (@c IIIIIIIIIIIIIIII)
+  CM_SURF_FMT_sLUMINANCE16I,  ///< Luminance, 16 bits per element packed as (@c LLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_sLUMINANCE_ALPHA16I,  ///< Luminance Alpha, 32 bits per element packed as (@c
+                                    ///< AAAAAAAAAAAAAAAALLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_sRGBA8I,              ///< RGBA,  32 bits per element packed as (@c
+                                    ///< AAAAAAAABBBBBBBBGGGGGGGGRRRRRRRR)
+  CM_SURF_FMT_sRGBX8I,              ///< RGB,   32 bits per element packed as (@c
+                                    ///< XXXXXXXXBBBBBBBBGGGGGGGGRRRRRRRR)
+  CM_SURF_FMT_sALPHA8I,             ///< Alpha, 8 bits per element packed as (@c AAAAAAAA)
+  CM_SURF_FMT_sINTENSITY8I,         ///< Intensity, 8 bits per element packed as (@c IIIIIIII)
+  CM_SURF_FMT_sLUMINANCE8I,         ///< Luminance, 8 bits per element packed as (@c LLLLLLLL)
+  CM_SURF_FMT_sLUMINANCE_ALPHA8I,   ///< Alpha, 8 bits per element packed as (@c AAAAAAAA)
+  CM_SURF_FMT_sDXT6,                ///< compressed, CM_SURF_FMT_sDXT6
+  CM_SURF_FMT_DXT6,                 ///< compressed, CM_SURF_FMT_DXT6
+  CM_SURF_FMT_DXT7,                 ///< compressed, DXT7
+  CM_SURF_FMT_LUMINANCE8_SNORM,   ///< Luminance,  8 bits per element packed as signed (@c LLLLLLLL)
+  CM_SURF_FMT_LUMINANCE16_SNORM,  ///< Luminance, 16 bits per element packed as signed (@c
+                                  ///< LLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_INTENSITY8_SNORM,   ///< Intensity,  8 bits per element packed as signed (@c IIIIIIII)
+  CM_SURF_FMT_INTENSITY16_SNORM,  ///< Intensity, 16 bits per element packed as signed (@c
+                                  ///< IIIIIIIIIIIIIIII)
+  CM_SURF_FMT_ALPHA8_SNORM,       ///< Alpha,      8 bits per element packed as signed (@c AAAAAAAA)
+  CM_SURF_FMT_ALPHA16_SNORM,      ///< Alpha,     16 bits per element packed as signed (@c
+                                  ///< AAAAAAAAAAAAAAAA)
+  CM_SURF_FMT_LUMINANCE_ALPHA8_SNORM,   ///< Luminance Alpha, 16 bits per element packed as signed
+                                        ///< (@c AAAAAAAALLLLLLLL)
+  CM_SURF_FMT_LUMINANCE_ALPHA16_SNORM,  ///< Luminance Alpha, 32 bits per element packed as signed
+                                        ///< (@c AAAAAAAAAAAAAAAALLLLLLLLLLLLLLLL)
+  CM_SURF_FMT_R8_SNORM,      ///< R,     8   bits per element packed as signed (@c RRRRRRRR)
+  CM_SURF_FMT_R16_SNORM,     ///< R,    16   bits per element packed as signed (@c RRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_RG8_SNORM,     ///< RG8,   16 bits per element packed as signed (@c RRRRRRRRGGGGGGGG)
+  CM_SURF_FMT_RG16_SNORM,    ///< RG,    32 bits per element packed as signed (@c
+                             ///< RRRRRRRRRRRRRRRRGGGGGGGGGGGGGGGG)
+  CM_SURF_FMT_RGBX8_SNORM,   ///< RGB,   32 bits per element packed as signed (@c
+                             ///< XXXXXXXXBBBBBBBBGGGGGGGGRRRRRRRR)
+  CM_SURF_FMT_RGBX16_SNORM,  ///< RGB,   64 bits per element packed as signed (@c
+                             ///< XXXXXXXXXXXXXXXXBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_RGBA8_SNORM,   ///< RGBA,  32 bits per element packed as signed (@c
+                             ///< AAAAAAAABBBBBBBBGGGGGGGGRRRRRRRR)
+  CM_SURF_FMT_RGBA16_SNORM,  ///< RGBA,  64 bits per element packed as signed (@c
+                             ///< AAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_RGB10_A2UI,    ///< RGBA,  32 bits per element packed as (@c
+                             ///< AABBBBBBBBBBGGGGGGGGGGRRRRRRRRRR)
+  CM_SURF_FMT_RGB32F,        ///< RGB, float, 96 bits per element packed as (@c
+                       ///< BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_RGB32I,  ///< RGB, unnormalized int, 96 bits per element packed as (@c
+                       ///< BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_RGB32UI,  ///< RGB, unnormalized uint, 96 bits per element packed as (@c
+                        ///< BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR)
+  CM_SURF_FMT_RGBX8_SRGB,             ///< RGB,   32 bits per element packed as (@c
+                                      ///< XXXXXXXXBBBBBBBBGGGGGGGGRRRRRRRR)
+  CM_SURF_FMT_RGBA8_SRGB,             ///< RGBA,  32 bits per element packed as (@c
+                                      ///< AAAAAAAABBBBBBBBGGGGGGGGRRRRRRRR)
+  CM_SURF_FMT_DXT1_SRGB,              ///< compressed, DXT1
+  CM_SURF_FMT_DXT1A_SRGB,             ///<
+  CM_SURF_FMT_DXT2_3_SRGB,            ///< compressed, DXT2_3
+  CM_SURF_FMT_DXT4_5_SRGB,            ///< compressed, DXT4_5
+  CM_SURF_FMT_DXT7_SRGB,              ///< compressed, DXT7
+  CM_SURF_FMT_RGB8_ETC2,              ///< ETC2 compressed, RGB8 in 64 bits
+  CM_SURF_FMT_SRGB8_ETC2,             ///< ETC2 compressed, SRGB8 in 64 bits
+  CM_SURF_FMT_RGB8_PT_ALPHA1_ETC2,    ///< ETC2 compressed, RGB8 in 64 bits
+  CM_SURF_FMT_SRGB8_PT_ALPHA1_ETC2,   ///< ETC2 compressed, sRGB8A1 in 64 bits
+  CM_SURF_FMT_RGBA8_ETC2_EAC,         ///< ETC2 compressed, RGBA8 in 128 bits
+  CM_SURF_FMT_SRGB8_ALPHA8_ETC2_EAC,  ///< ETC2 compressed, sRGBA8 in 128 bits
+  CM_SURF_FMT_R11_EAC,                ///< EAC compressed, R11 in 64 bits
+  CM_SURF_FMT_SIGNED_R11_EAC,         ///< EAC compressed, signed R11 in 64 bits
+  CM_SURF_FMT_RG11_EAC,               ///< EAC compressed, RG11 in 128 bits
+  CM_SURF_FMT_SIGNED_RG11_EAC,        ///< EAC compressed, signed RG11 in 128 bits
 
-    CM_SURF_FMT_RGBA8_ASTC_4x4,                     ///< ASTC compressed RGBA8 in 128 bits block
-    CM_SURF_FMT_RGBA8_ASTC_5x4,                     ///< ASTC compressed RGBA8 in 128 bits block
-    CM_SURF_FMT_RGBA8_ASTC_5x5,                     ///< ASTC compressed RGBA8 in 128 bits block
-    CM_SURF_FMT_RGBA8_ASTC_6x5,                     ///< ASTC compressed RGBA8 in 128 bits block
-    CM_SURF_FMT_RGBA8_ASTC_6x6,                     ///< ASTC compressed RGBA8 in 128 bits block
-    CM_SURF_FMT_RGBA8_ASTC_8x5,                     ///< ASTC compressed RGBA8 in 128 bits block
-    CM_SURF_FMT_RGBA8_ASTC_8x6,                     ///< ASTC compressed RGBA8 in 128 bits block
-    CM_SURF_FMT_RGBA8_ASTC_8x8,                     ///< ASTC compressed RGBA8 in 128 bits block
-    CM_SURF_FMT_RGBA8_ASTC_10x5,                     ///< ASTC compressed RGBA8 in 128 bits block
-    CM_SURF_FMT_RGBA8_ASTC_10x6,                     ///< ASTC compressed RGBA8 in 128 bits block
-    CM_SURF_FMT_RGBA8_ASTC_10x8,                     ///< ASTC compressed RGBA8 in 128 bits block
-    CM_SURF_FMT_RGBA8_ASTC_10x10,                     ///< ASTC compressed RGBA8 in 128 bits block
-    CM_SURF_FMT_RGBA8_ASTC_12x10,                     ///< ASTC compressed RGBA8 in 128 bits block
-    CM_SURF_FMT_RGBA8_ASTC_12x12,                     ///< ASTC compressed RGBA8 in 128 bits block
+  CM_SURF_FMT_RGBA8_ASTC_4x4,    ///< ASTC compressed RGBA8 in 128 bits block
+  CM_SURF_FMT_RGBA8_ASTC_5x4,    ///< ASTC compressed RGBA8 in 128 bits block
+  CM_SURF_FMT_RGBA8_ASTC_5x5,    ///< ASTC compressed RGBA8 in 128 bits block
+  CM_SURF_FMT_RGBA8_ASTC_6x5,    ///< ASTC compressed RGBA8 in 128 bits block
+  CM_SURF_FMT_RGBA8_ASTC_6x6,    ///< ASTC compressed RGBA8 in 128 bits block
+  CM_SURF_FMT_RGBA8_ASTC_8x5,    ///< ASTC compressed RGBA8 in 128 bits block
+  CM_SURF_FMT_RGBA8_ASTC_8x6,    ///< ASTC compressed RGBA8 in 128 bits block
+  CM_SURF_FMT_RGBA8_ASTC_8x8,    ///< ASTC compressed RGBA8 in 128 bits block
+  CM_SURF_FMT_RGBA8_ASTC_10x5,   ///< ASTC compressed RGBA8 in 128 bits block
+  CM_SURF_FMT_RGBA8_ASTC_10x6,   ///< ASTC compressed RGBA8 in 128 bits block
+  CM_SURF_FMT_RGBA8_ASTC_10x8,   ///< ASTC compressed RGBA8 in 128 bits block
+  CM_SURF_FMT_RGBA8_ASTC_10x10,  ///< ASTC compressed RGBA8 in 128 bits block
+  CM_SURF_FMT_RGBA8_ASTC_12x10,  ///< ASTC compressed RGBA8 in 128 bits block
+  CM_SURF_FMT_RGBA8_ASTC_12x12,  ///< ASTC compressed RGBA8 in 128 bits block
 
-    CM_SURF_FMT_SRGBA8_ASTC_4x4,                     ///< ASTC compressed SRGBA8 in 128 bits block
-    CM_SURF_FMT_SRGBA8_ASTC_5x4,                     ///< ASTC compressed SRGBA8 in 128 bits block
-    CM_SURF_FMT_SRGBA8_ASTC_5x5,                     ///< ASTC compressed SRGBA8 in 128 bits block
-    CM_SURF_FMT_SRGBA8_ASTC_6x5,                     ///< ASTC compressed SRGBA8 in 128 bits block
-    CM_SURF_FMT_SRGBA8_ASTC_6x6,                     ///< ASTC compressed SRGBA8 in 128 bits block
-    CM_SURF_FMT_SRGBA8_ASTC_8x5,                     ///< ASTC compressed SRGBA8 in 128 bits block
-    CM_SURF_FMT_SRGBA8_ASTC_8x6,                     ///< ASTC compressed SRGBA8 in 128 bits block
-    CM_SURF_FMT_SRGBA8_ASTC_8x8,                     ///< ASTC compressed SRGBA8 in 128 bits block
-    CM_SURF_FMT_SRGBA8_ASTC_10x5,                     ///< ASTC compressed SRGBA8 in 128 bits block
-    CM_SURF_FMT_SRGBA8_ASTC_10x6,                     ///< ASTC compressed SRGBA8 in 128 bits block
-    CM_SURF_FMT_SRGBA8_ASTC_10x8,                     ///< ASTC compressed SRGBA8 in 128 bits block
-    CM_SURF_FMT_SRGBA8_ASTC_10x10,                     ///< ASTC compressed SRGBA8 in 128 bits block
-    CM_SURF_FMT_SRGBA8_ASTC_12x10,                     ///< ASTC compressed SRGBA8 in 128 bits block
-    CM_SURF_FMT_SRGBA8_ASTC_12x12,                     ///< ASTC compressed SRGBA8 in 128 bits block
+  CM_SURF_FMT_SRGBA8_ASTC_4x4,    ///< ASTC compressed SRGBA8 in 128 bits block
+  CM_SURF_FMT_SRGBA8_ASTC_5x4,    ///< ASTC compressed SRGBA8 in 128 bits block
+  CM_SURF_FMT_SRGBA8_ASTC_5x5,    ///< ASTC compressed SRGBA8 in 128 bits block
+  CM_SURF_FMT_SRGBA8_ASTC_6x5,    ///< ASTC compressed SRGBA8 in 128 bits block
+  CM_SURF_FMT_SRGBA8_ASTC_6x6,    ///< ASTC compressed SRGBA8 in 128 bits block
+  CM_SURF_FMT_SRGBA8_ASTC_8x5,    ///< ASTC compressed SRGBA8 in 128 bits block
+  CM_SURF_FMT_SRGBA8_ASTC_8x6,    ///< ASTC compressed SRGBA8 in 128 bits block
+  CM_SURF_FMT_SRGBA8_ASTC_8x8,    ///< ASTC compressed SRGBA8 in 128 bits block
+  CM_SURF_FMT_SRGBA8_ASTC_10x5,   ///< ASTC compressed SRGBA8 in 128 bits block
+  CM_SURF_FMT_SRGBA8_ASTC_10x6,   ///< ASTC compressed SRGBA8 in 128 bits block
+  CM_SURF_FMT_SRGBA8_ASTC_10x8,   ///< ASTC compressed SRGBA8 in 128 bits block
+  CM_SURF_FMT_SRGBA8_ASTC_10x10,  ///< ASTC compressed SRGBA8 in 128 bits block
+  CM_SURF_FMT_SRGBA8_ASTC_12x10,  ///< ASTC compressed SRGBA8 in 128 bits block
+  CM_SURF_FMT_SRGBA8_ASTC_12x12,  ///< ASTC compressed SRGBA8 in 128 bits block
 
-    CM_SURF_FMT_BGR10_A2UI,                         ///< RGBA,  32 bits per element packed as (@c AARRRRRRRRRRGGGGGGGGGGBBBBBBBBBB)
-    CM_SURF_FMT_A2_BGR10UI,                         ///< RGBA,  32 bits per element packed as (@c RRRRRRRRRRGGGGGGGGGGBBBBBBBBBBAA)
-    CM_SURF_FMT_A2_RGB10UI,                         ///< RGBA,  32 bits per element packed as (@c BBBBBBBBBBGGGGGGGGGGRRRRRRRRRRAA)
-    CM_SURF_FMT_B5_G6_R5UI,                         ///< RGB,   16 bits per element packed as (@c BBBBBGGGGGGRRRRR)
-    CM_SURF_FMT_R5_G6_B5UI,                         ///< RGB,   16 bits per element packed as (@c RRRRRGGGGGGBBBBB)
+  CM_SURF_FMT_BGR10_A2UI,  ///< RGBA,  32 bits per element packed as (@c
+                           ///< AARRRRRRRRRRGGGGGGGGGGBBBBBBBBBB)
+  CM_SURF_FMT_A2_BGR10UI,  ///< RGBA,  32 bits per element packed as (@c
+                           ///< RRRRRRRRRRGGGGGGGGGGBBBBBBBBBBAA)
+  CM_SURF_FMT_A2_RGB10UI,  ///< RGBA,  32 bits per element packed as (@c
+                           ///< BBBBBBBBBBGGGGGGGGGGRRRRRRRRRRAA)
+  CM_SURF_FMT_B5_G6_R5UI,  ///< RGB,   16 bits per element packed as (@c BBBBBGGGGGGRRRRR)
+  CM_SURF_FMT_R5_G6_B5UI,  ///< RGB,   16 bits per element packed as (@c RRRRRGGGGGGBBBBB)
 
-    CM_SURF_FMT_DEPTH32F_X24_STEN8_UNCLAMPED,       ///< depth + stencil, 64 bits per element packed as (@c XXXXXXXXXXXXXXXXXXXXXXXXSSSSSSSSDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD)
-    CM_SURF_FMT_DEPTH32F_UNCLAMPED,                 ///< depth, 32 bits per element packed as (@c DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD)
+  CM_SURF_FMT_DEPTH32F_X24_STEN8_UNCLAMPED,  ///< depth + stencil, 64 bits per element packed as (@c
+                                             ///< XXXXXXXXXXXXXXXXXXXXXXXXSSSSSSSSDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD)
+  CM_SURF_FMT_DEPTH32F_UNCLAMPED,  ///< depth, 32 bits per element packed as (@c
+                                   ///< DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD)
 
-    CM_SURF_FMT_L8_X16_A8_SRGB,                     ///< Sluminance Alpha,  32 bits per element packed as (@c AAAAAAAAXXXXXXXXXXXXXXXXLLLLLLLL)
-    CM_SURF_FMT_L8_X24_SRGB,                        ///< Sluminance,        32 bits per element packed as (@c XXXXXXXXXXXXXXXXXXXXXXXXLLLLLLLL)
+  CM_SURF_FMT_L8_X16_A8_SRGB,  ///< Sluminance Alpha,  32 bits per element packed as (@c
+                               ///< AAAAAAAAXXXXXXXXXXXXXXXXLLLLLLLL)
+  CM_SURF_FMT_L8_X24_SRGB,     ///< Sluminance,        32 bits per element packed as (@c
+                               ///< XXXXXXXXXXXXXXXXXXXXXXXXLLLLLLLL)
 
-    CM_SURF_FMT_STENCIL8,                           ///< stencil, 32 bits per element packed as (@c SSSSSSSSXXXXXXXXXXXXXXXXXXXXXXXX)
+  CM_SURF_FMT_STENCIL8,  ///< stencil, 32 bits per element packed as (@c
+                         ///< SSSSSSSSXXXXXXXXXXXXXXXXXXXXXXXX)
 
 
-                                                    // non-native surface formats after this line, will be ignored by HWL
-                                                    // all non-native surface formats should use the _NN suffix to distinguish
-                                                    // them from potential corresponding native formats added in the future
-    CM_SURF_FMT_I420_NN,                            ///< 4:2:0 Planar Y-U-V format
-    CM_SURF_FMT_YV12_NN,                            ///< 4:2:0 Planar Y-V-U format
-    CM_SURF_FMT_NV12_NN,                            ///< 4:2:0 Semi-planar Y-UV format
-    CM_SURF_FMT_NV21_NN,                            ///< 4:2:0 Semi-planar Y-VU format
-    cmSurfFmt_FIRST = CM_SURF_FMT_LUMINANCE8,       ///< First surface format
-    cmSurfFmt_LAST = CM_SURF_FMT_STENCIL8,        ///< Last native surface format
-    cmSurfFmt_LAST_NON_NATIVE = CM_SURF_FMT_NV21_NN,///< Last non-native surface format
+  // non-native surface formats after this line, will be ignored by HWL
+  // all non-native surface formats should use the _NN suffix to distinguish
+  // them from potential corresponding native formats added in the future
+  CM_SURF_FMT_I420_NN,                              ///< 4:2:0 Planar Y-U-V format
+  CM_SURF_FMT_YV12_NN,                              ///< 4:2:0 Planar Y-V-U format
+  CM_SURF_FMT_NV12_NN,                              ///< 4:2:0 Semi-planar Y-UV format
+  CM_SURF_FMT_NV21_NN,                              ///< 4:2:0 Semi-planar Y-VU format
+  cmSurfFmt_FIRST = CM_SURF_FMT_LUMINANCE8,         ///< First surface format
+  cmSurfFmt_LAST = CM_SURF_FMT_STENCIL8,            ///< Last native surface format
+  cmSurfFmt_LAST_NON_NATIVE = CM_SURF_FMT_NV21_NN,  ///< Last non-native surface format
 } cmSurfFmt;
 
 typedef struct cmFormatXlateRec {
-    cmSurfFmt          raw_cmFormat;
-    cl_channel_type    image_channel_data_type;
-    cl_channel_order   image_channel_order;
+  cmSurfFmt raw_cmFormat;
+  cl_channel_type image_channel_data_type;
+  cl_channel_order image_channel_order;
 } cmFormatXlateParams;
 
 // relates full range of cm surface formats to those supported by CAL
-static const   cmFormatXlateParams cmFormatXlateTable[] = {
-    { CM_SURF_FMT_LUMINANCE8,            CL_UNORM_INT8,          CL_LUMINANCE },
-    { CM_SURF_FMT_LUMINANCE16,           CL_UNORM_INT16,         CL_LUMINANCE },
-    { CM_SURF_FMT_LUMINANCE16F,          CL_HALF_FLOAT,          CL_LUMINANCE },
-    { CM_SURF_FMT_LUMINANCE32F,          CL_FLOAT,               CL_LUMINANCE },
-    { CM_SURF_FMT_INTENSITY8,            CL_UNORM_INT8,          CL_INTENSITY },
-    { CM_SURF_FMT_INTENSITY16,           CL_UNORM_INT16,         CL_INTENSITY },
-    { CM_SURF_FMT_INTENSITY16F,          CL_HALF_FLOAT,          CL_INTENSITY },
-    { CM_SURF_FMT_INTENSITY32F,          CL_FLOAT,               CL_INTENSITY },
-    { CM_SURF_FMT_ALPHA8,                CL_UNSIGNED_INT8,       CL_A         },
-    { CM_SURF_FMT_ALPHA16,               CL_UNORM_INT16,         CL_A         },
-    { CM_SURF_FMT_ALPHA16F,              CL_HALF_FLOAT,          CL_A         },
-    { CM_SURF_FMT_ALPHA32F,              CL_FLOAT,               CL_A         },
-    { CM_SURF_FMT_LUMINANCE8_ALPHA8,     CL_UNSIGNED_INT8,       CL_RG        },
-    { CM_SURF_FMT_LUMINANCE16_ALPHA16,   CL_UNSIGNED_INT16,      CL_RG        },
-    { CM_SURF_FMT_LUMINANCE16F_ALPHA16F, CL_HALF_FLOAT,          CL_RG        },
-    { CM_SURF_FMT_LUMINANCE32F_ALPHA32F, CL_FLOAT,               CL_RG        },
-    { CM_SURF_FMT_B2_G3_R3,              500,                    CL_R         },
-    { CM_SURF_FMT_B5_G6_R5,              CL_UNSIGNED_INT16,      CL_RGB       },
-    { CM_SURF_FMT_BGRX4,                 500,                    CL_BGRA      },
-    { CM_SURF_FMT_BGR5_X1,               CL_UNSIGNED_INT16,      CL_RGB       },
-    { CM_SURF_FMT_BGRX8,                 CL_UNORM_INT8,          CL_BGRA      },
-    { CM_SURF_FMT_BGR10_X2,              CL_UNORM_INT_101010,    CL_RGB       },
-    { CM_SURF_FMT_BGRX16,                CL_UNORM_INT16,         CL_BGRA      },
-    { CM_SURF_FMT_BGRX16F,               CL_HALF_FLOAT,          CL_BGRA      },
-    { CM_SURF_FMT_BGRX32F,               CL_FLOAT,               CL_BGRA      },
-    { CM_SURF_FMT_RGBX4,                 500,                    CL_RGB       },
-    { CM_SURF_FMT_RGB5_X1,               CL_UNORM_INT16,         CL_BGRA      },
-    { CM_SURF_FMT_RGBX8,                 CL_UNORM_INT8,          CL_RGBA      },
-    { CM_SURF_FMT_RGB10_X2,              CL_UNORM_INT_101010,    CL_RGBA      },
-    { CM_SURF_FMT_RGBX16,                CL_UNORM_INT16,         CL_RGBA      },
-    { CM_SURF_FMT_RGBX16F,               CL_HALF_FLOAT,          CL_RGBA      },
-    { CM_SURF_FMT_RGBX32F,               CL_FLOAT,               CL_RGBA      },
-    { CM_SURF_FMT_BGRA4,                 500,                    CL_BGRA      },
-    { CM_SURF_FMT_BGR5_A1,               CL_UNSIGNED_INT16,      CL_BGRA      },
-    { CM_SURF_FMT_BGRA8,                 CL_UNORM_INT8,          CL_BGRA      },
-    { CM_SURF_FMT_BGR10_A2,              500,                    CL_BGRA      },
-    { CM_SURF_FMT_BGRA16,                CL_UNORM_INT16,         CL_BGRA      },
-    { CM_SURF_FMT_BGRA16F,               CL_UNORM_INT16,         CL_BGRA      },
-    { CM_SURF_FMT_BGRA32F,               CL_FLOAT,               CL_BGRA      },
-    { CM_SURF_FMT_RGBA4,                 500,                    CL_RGBA      },
-    { CM_SURF_FMT_RGB5_A1,               CL_UNSIGNED_INT16,      CL_RGBA      },
-    { CM_SURF_FMT_RGBA8,                 CL_UNORM_INT8,          CL_RGBA      },
-    { CM_SURF_FMT_RGB10_A2,              CL_UNORM_INT_101010,    CL_RGB       },
-    { CM_SURF_FMT_RGBA16,                CL_UNORM_INT16,         CL_RGBA      },
-    { CM_SURF_FMT_RGBA16F,               CL_HALF_FLOAT,          CL_RGBA      },
-    { CM_SURF_FMT_RGBA32I,               CL_UNSIGNED_INT32,      CL_RGBA      },
-    { CM_SURF_FMT_RGBA32F,               CL_FLOAT,               CL_RGBA      },
-    { CM_SURF_FMT_DUDV8,                 CL_UNSIGNED_INT8,       CL_RG        },
-    { CM_SURF_FMT_DXT1,                  500,                    CL_R         },
-    { CM_SURF_FMT_DXT2_3,                500,                    CL_R         },
-    { CM_SURF_FMT_DXT4_5,                500,                    CL_R         },
-    { CM_SURF_FMT_ATI1N,                 500,                    CL_R         },
-    { CM_SURF_FMT_ATI2N,                 500,                    CL_R         },
-    { CM_SURF_FMT_DEPTH16,               CL_UNORM_INT16,         CL_DEPTH     },
-    { CM_SURF_FMT_DEPTH16F,              CL_HALF_FLOAT,          CL_DEPTH     },
-    { CM_SURF_FMT_DEPTH24_X8,            500,                    CL_DEPTH     },
-    { CM_SURF_FMT_DEPTH24F_X8,           500,                    CL_DEPTH     },
-    { CM_SURF_FMT_DEPTH24_STEN8,         CL_UNORM_INT24,         CL_DEPTH_STENCIL },
-    { CM_SURF_FMT_DEPTH24F_STEN8,        500,                    CL_DEPTH_STENCIL },
-    { CM_SURF_FMT_DEPTH32F_X24_STEN8,    CL_FLOAT,               CL_DEPTH_STENCIL },
-    { CM_SURF_FMT_DEPTH32F,              CL_FLOAT,               CL_DEPTH     },
-    { CM_SURF_FMT_sR11_sG11_sB10,        500,                    CL_R         },
-    { CM_SURF_FMT_sU16,                  CL_SNORM_INT16,         CL_R         },
-    { CM_SURF_FMT_sUV16,                 CL_SNORM_INT16,         CL_RG        },
-    { CM_SURF_FMT_sUVWQ16,               CL_SNORM_INT16,         CL_RGBA      },
-    { CM_SURF_FMT_RG16,                  CL_UNORM_INT16,         CL_RG        },
-    { CM_SURF_FMT_RG16F,                 CL_HALF_FLOAT,          CL_RG        },
-    { CM_SURF_FMT_RG32F,                 CL_FLOAT,               CL_RG        },
-    { CM_SURF_FMT_ABGR4,                 500,                    CL_ARGB      },
-    { CM_SURF_FMT_A1_BGR5,               CL_UNSIGNED_INT16,      CL_ARGB      },
-    { CM_SURF_FMT_ABGR8,                 CL_UNORM_INT8,          CL_ARGB      },
-    { CM_SURF_FMT_A2_BGR10,              CL_UNORM_INT_101010,    CL_RGB       },
-    { CM_SURF_FMT_ABGR16,                CL_UNORM_INT16,         CL_ARGB      },
-    { CM_SURF_FMT_ABGR16F,               CL_HALF_FLOAT,          CL_ARGB      },
-    { CM_SURF_FMT_ABGR32F,               CL_FLOAT,               CL_ARGB      },
-    { CM_SURF_FMT_DXT1A,                 500,                    CL_R         },
-    { CM_SURF_FMT_sRGB10_A2,             500,                    CL_RGBA      },
-    { CM_SURF_FMT_sR8,                   CL_SNORM_INT8,          CL_R         },
-    { CM_SURF_FMT_sRG8,                  CL_SNORM_INT8,          CL_RG        },
-    { CM_SURF_FMT_sR32I,                 CL_SIGNED_INT32,        CL_R         },
-    { CM_SURF_FMT_sRG32I,                CL_SIGNED_INT32,        CL_RG        },
-    { CM_SURF_FMT_sRGBA32I,              CL_SIGNED_INT32,        CL_RGBA      },
-    { CM_SURF_FMT_R32I,                  CL_UNSIGNED_INT32,      CL_R         },
-    { CM_SURF_FMT_RG32I,                 CL_UNSIGNED_INT32,      CL_RG        },
-    { CM_SURF_FMT_RG8,                   CL_UNORM_INT8,          CL_RG        },
-    { CM_SURF_FMT_sRGBA8,                CL_SNORM_INT8,          CL_RGBA      },
-    { CM_SURF_FMT_R11F_G11F_B10F,        500,                    CL_RGBA      },
-    { CM_SURF_FMT_RGB9_E5,               CL_UNORM_INT8,          CL_ARGB      },
-    { CM_SURF_FMT_LUMINANCE_LATC1,       500,                    CL_RGBA      },
-    { CM_SURF_FMT_SIGNED_LUMINANCE_LATC1,500,                    CL_RGBA      },
-    { CM_SURF_FMT_LUMINANCE_ALPHA_LATC2, 500,                    CL_RGBA      },
-    { CM_SURF_FMT_SIGNED_LUMINANCE_ALPHA_LATC2, 500,             CL_RGBA      },
-    { CM_SURF_FMT_RED_RGTC1,             500,                    CL_RGBA      },
-    { CM_SURF_FMT_SIGNED_RED_RGTC1,      500,                    CL_RGBA      },
-    { CM_SURF_FMT_RED_GREEN_RGTC2,       500,                    CL_RGBA      },
-    { CM_SURF_FMT_SIGNED_RED_GREEN_RGTC2,500,                    CL_RGBA      },
-    { CM_SURF_FMT_R8,                    CL_UNORM_INT8,          CL_R         },
-    { CM_SURF_FMT_R16,                   CL_UNORM_INT16,         CL_R         },
-    { CM_SURF_FMT_R16F,                  CL_HALF_FLOAT,          CL_R         },
-    { CM_SURF_FMT_R32F,                  CL_FLOAT,               CL_R         },
-    { CM_SURF_FMT_R8I,                   CL_UNSIGNED_INT8,       CL_R         },
-    { CM_SURF_FMT_sR8I,                  CL_SIGNED_INT8,         CL_R         },
-    { CM_SURF_FMT_RG8I,                  CL_UNSIGNED_INT8,       CL_RG        },
-    { CM_SURF_FMT_sRG8I,                 CL_SIGNED_INT8,         CL_RG        },
-    { CM_SURF_FMT_R16I,                  CL_UNSIGNED_INT16,      CL_R         },
-    { CM_SURF_FMT_sR16I,                 CL_SIGNED_INT16,        CL_R         },
-    { CM_SURF_FMT_RG16I,                 CL_UNSIGNED_INT16,      CL_RG        },
-    { CM_SURF_FMT_sRG16I,                CL_SIGNED_INT16,        CL_RG        },
-    { CM_SURF_FMT_RGBA32UI,              CL_UNSIGNED_INT32,      CL_RGBA      },
-    { CM_SURF_FMT_RGBX32UI,              CL_UNSIGNED_INT32,      CL_RGBA      },
-    { CM_SURF_FMT_ALPHA32UI,             CL_UNSIGNED_INT32,      CL_R         },
-    { CM_SURF_FMT_INTENSITY32UI,         CL_UNSIGNED_INT32,      CL_R         },
-    { CM_SURF_FMT_LUMINANCE32UI,         CL_UNSIGNED_INT32,      CL_R         },
-    { CM_SURF_FMT_LUMINANCE_ALPHA32UI,   CL_UNSIGNED_INT32,      CL_RG        },
-    { CM_SURF_FMT_RGBA16UI,              CL_UNSIGNED_INT16,      CL_RGBA      },
-    { CM_SURF_FMT_RGBX16UI,              CL_UNSIGNED_INT16,      CL_RGBA      },
-    { CM_SURF_FMT_ALPHA16UI,             CL_UNSIGNED_INT16,      CL_R         },
-    { CM_SURF_FMT_INTENSITY16UI,         CL_UNSIGNED_INT16,      CL_R         },
-    { CM_SURF_FMT_LUMINANCE16UI,         CL_UNSIGNED_INT16,      CL_R         },
-    { CM_SURF_FMT_LUMINANCE_ALPHA16UI,   CL_UNSIGNED_INT32,      CL_RG        },
-    { CM_SURF_FMT_RGBA8UI,               CL_UNSIGNED_INT8,       CL_RGBA      },
-    { CM_SURF_FMT_RGBX8UI,               CL_UNORM_INT8,          CL_RGBA      },
-    { CM_SURF_FMT_ALPHA8UI,              CL_UNSIGNED_INT8,       CL_R         },
-    { CM_SURF_FMT_INTENSITY8UI,          CL_UNSIGNED_INT8,       CL_R         },
-    { CM_SURF_FMT_LUMINANCE8UI,          CL_UNSIGNED_INT8,       CL_R         },
-    { CM_SURF_FMT_LUMINANCE_ALPHA8UI,    CL_UNSIGNED_INT8,       CL_RG        },
-    { CM_SURF_FMT_sRGBX32I,              CL_SIGNED_INT32,        CL_RGBA      },
-    { CM_SURF_FMT_sALPHA32I,             CL_SIGNED_INT32,        CL_R         },
-    { CM_SURF_FMT_sINTENSITY32I,         CL_SIGNED_INT32,        CL_R         },
-    { CM_SURF_FMT_sLUMINANCE32I,         CL_SIGNED_INT32,        CL_R         },
-    { CM_SURF_FMT_sLUMINANCE_ALPHA32I,   CL_SIGNED_INT32,        CL_RG        },
-    { CM_SURF_FMT_sRGBA16I,              CL_SIGNED_INT16,        CL_RGBA      },
-    { CM_SURF_FMT_sRGBX16I,              CL_SIGNED_INT16,        CL_RGBA      },
-    { CM_SURF_FMT_sALPHA16I,             CL_SIGNED_INT16,        CL_R         },
-    { CM_SURF_FMT_sINTENSITY16I,         CL_SIGNED_INT16,        CL_R         },
-    { CM_SURF_FMT_sLUMINANCE16I,         CL_SIGNED_INT16,        CL_R         },
-    { CM_SURF_FMT_sLUMINANCE_ALPHA16I,   CL_SIGNED_INT16,        CL_RG        },
-    { CM_SURF_FMT_sRGBA8I,               CL_SIGNED_INT8,         CL_RGBA      },
-    { CM_SURF_FMT_sRGBX8I,               CL_SIGNED_INT8,         CL_RGBA      },
-    { CM_SURF_FMT_sALPHA8I,              CL_SIGNED_INT8,         CL_R         },
-    { CM_SURF_FMT_sINTENSITY8I,          CL_SIGNED_INT8,         CL_R         },
-    { CM_SURF_FMT_sLUMINANCE8I,          CL_SIGNED_INT8,         CL_R         },
-    { CM_SURF_FMT_sLUMINANCE_ALPHA8I,    CM_SURF_FMT_sRG8I,      CL_RG        },
-    { CM_SURF_FMT_sDXT6,                 500,                    CL_R         },
-    { CM_SURF_FMT_DXT6,                  500,                    CL_R         },
-    { CM_SURF_FMT_DXT7,                  500,                    CL_R         },
-    { CM_SURF_FMT_LUMINANCE8_SNORM,      CL_SNORM_INT8,          CL_R         },
-    { CM_SURF_FMT_LUMINANCE16_SNORM,     CL_SNORM_INT16,         CL_R         },
-    { CM_SURF_FMT_INTENSITY8_SNORM,      CL_SNORM_INT8,          CL_R         },
-    { CM_SURF_FMT_INTENSITY16_SNORM,     CL_SNORM_INT16,         CL_R         },
-    { CM_SURF_FMT_ALPHA8_SNORM,          CL_SNORM_INT8,          CL_R         },
-    { CM_SURF_FMT_ALPHA16_SNORM,         CL_SNORM_INT16,         CL_R         },
-    { CM_SURF_FMT_LUMINANCE_ALPHA8_SNORM,CL_SNORM_INT8,          CL_RG        },
-    { CM_SURF_FMT_LUMINANCE_ALPHA16_SNORM,CL_SNORM_INT16,        CL_RG        },
-    { CM_SURF_FMT_R8_SNORM,               CL_SNORM_INT8,         CL_R         },
-    { CM_SURF_FMT_R16_SNORM,              CL_SNORM_INT16,        CL_R         },
-    { CM_SURF_FMT_RG8_SNORM,              CL_SNORM_INT8,         CL_RG        },
-    { CM_SURF_FMT_RG16_SNORM,             CL_SNORM_INT16,        CL_RG        },
-    { CM_SURF_FMT_RGBX8_SNORM,            CL_SNORM_INT8,         CL_RGBA      },
-    { CM_SURF_FMT_RGBX16_SNORM,           CL_SNORM_INT16,        CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_SNORM,            CL_SNORM_INT8,         CL_RGBA      },
-    { CM_SURF_FMT_RGBA16_SNORM,           CL_SNORM_INT16,        CL_RGBA      },
-    { CM_SURF_FMT_RGB10_A2UI,             500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGB32F,                 500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGB32I,                 500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGB32UI,                500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBX8_SRGB,             500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_SRGB,             500,                   CL_RGBA      },
-    { CM_SURF_FMT_DXT1_SRGB,              500,                   CL_RGBA      },
-    { CM_SURF_FMT_DXT1A_SRGB,             500,                   CL_RGBA      },
-    { CM_SURF_FMT_DXT2_3_SRGB,            500,                   CL_RGBA      },
-    { CM_SURF_FMT_DXT4_5_SRGB,            500,                   CL_RGBA      },
-    { CM_SURF_FMT_DXT7_SRGB,              500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGB8_ETC2,              500,                   CL_RGB       },
-    { CM_SURF_FMT_SRGB8_ETC2,             500,                   CL_RGB       },
-    { CM_SURF_FMT_RGB8_PT_ALPHA1_ETC2,    500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGB8_PT_ALPHA1_ETC2,   500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_ETC2_EAC,         500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGB8_ALPHA8_ETC2_EAC,  500,                   CL_RGBA      },
-    { CM_SURF_FMT_R11_EAC,                500,                   CL_R         },
-    { CM_SURF_FMT_SIGNED_R11_EAC,         500,                   CL_R         },
-    { CM_SURF_FMT_RG11_EAC,               500,                   CL_RG        },
-    { CM_SURF_FMT_SIGNED_RG11_EAC,        500,                   CL_RG        },
-    { CM_SURF_FMT_RGBA8_ASTC_4x4,         500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_ASTC_5x4,         500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_ASTC_5x5,         500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_ASTC_6x5,         500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_ASTC_6x6,         500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_ASTC_8x5,         500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_ASTC_8x6,         500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_ASTC_8x8,         500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_ASTC_10x5,        500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_ASTC_10x6,        500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_ASTC_10x8,        500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_ASTC_10x10,       500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_ASTC_12x10,       500,                   CL_RGBA      },
-    { CM_SURF_FMT_RGBA8_ASTC_12x12,       500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGBA8_ASTC_4x4,        500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGBA8_ASTC_5x4,        500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGBA8_ASTC_5x5,        500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGBA8_ASTC_6x5,        500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGBA8_ASTC_6x6,        500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGBA8_ASTC_8x5,        500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGBA8_ASTC_8x6,        500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGBA8_ASTC_8x8,        500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGBA8_ASTC_10x5,       500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGBA8_ASTC_10x6,       500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGBA8_ASTC_10x8,       500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGBA8_ASTC_10x10,      500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGBA8_ASTC_12x10,      500,                   CL_RGBA      },
-    { CM_SURF_FMT_SRGBA8_ASTC_12x12,      500,                   CL_RGBA      },
-    { CM_SURF_FMT_BGR10_A2UI,             500,                   CL_BGRA      },
-    { CM_SURF_FMT_A2_BGR10UI,             500,                   CL_ARGB      },
-    { CM_SURF_FMT_A2_RGB10UI,             500,                   CL_ABGR      },
-    { CM_SURF_FMT_B5_G6_R5UI,             500,                   CL_BGRA      },
-    { CM_SURF_FMT_R5_G6_B5UI,             500,                   CL_RGBA      },
-    { CM_SURF_FMT_DEPTH32F_X24_STEN8_UNCLAMPED,CL_UNSIGNED_INT32, CL_R        },
-    { CM_SURF_FMT_DEPTH32F_UNCLAMPED,     CL_FLOAT,               CL_R        },
-    { CM_SURF_FMT_L8_X16_A8_SRGB,         500,                    CL_RGBA     },
-    { CM_SURF_FMT_L8_X24_SRGB,            500,                    CL_RGBA     },
-    { CM_SURF_FMT_STENCIL8,               CL_UNSIGNED_INT8,       CL_R        },
+static const cmFormatXlateParams cmFormatXlateTable[] = {
+    {CM_SURF_FMT_LUMINANCE8, CL_UNORM_INT8, CL_LUMINANCE},
+    {CM_SURF_FMT_LUMINANCE16, CL_UNORM_INT16, CL_LUMINANCE},
+    {CM_SURF_FMT_LUMINANCE16F, CL_HALF_FLOAT, CL_LUMINANCE},
+    {CM_SURF_FMT_LUMINANCE32F, CL_FLOAT, CL_LUMINANCE},
+    {CM_SURF_FMT_INTENSITY8, CL_UNORM_INT8, CL_INTENSITY},
+    {CM_SURF_FMT_INTENSITY16, CL_UNORM_INT16, CL_INTENSITY},
+    {CM_SURF_FMT_INTENSITY16F, CL_HALF_FLOAT, CL_INTENSITY},
+    {CM_SURF_FMT_INTENSITY32F, CL_FLOAT, CL_INTENSITY},
+    {CM_SURF_FMT_ALPHA8, CL_UNSIGNED_INT8, CL_A},
+    {CM_SURF_FMT_ALPHA16, CL_UNORM_INT16, CL_A},
+    {CM_SURF_FMT_ALPHA16F, CL_HALF_FLOAT, CL_A},
+    {CM_SURF_FMT_ALPHA32F, CL_FLOAT, CL_A},
+    {CM_SURF_FMT_LUMINANCE8_ALPHA8, CL_UNSIGNED_INT8, CL_RG},
+    {CM_SURF_FMT_LUMINANCE16_ALPHA16, CL_UNSIGNED_INT16, CL_RG},
+    {CM_SURF_FMT_LUMINANCE16F_ALPHA16F, CL_HALF_FLOAT, CL_RG},
+    {CM_SURF_FMT_LUMINANCE32F_ALPHA32F, CL_FLOAT, CL_RG},
+    {CM_SURF_FMT_B2_G3_R3, 500, CL_R},
+    {CM_SURF_FMT_B5_G6_R5, CL_UNSIGNED_INT16, CL_RGB},
+    {CM_SURF_FMT_BGRX4, 500, CL_BGRA},
+    {CM_SURF_FMT_BGR5_X1, CL_UNSIGNED_INT16, CL_RGB},
+    {CM_SURF_FMT_BGRX8, CL_UNORM_INT8, CL_BGRA},
+    {CM_SURF_FMT_BGR10_X2, CL_UNORM_INT_101010, CL_RGB},
+    {CM_SURF_FMT_BGRX16, CL_UNORM_INT16, CL_BGRA},
+    {CM_SURF_FMT_BGRX16F, CL_HALF_FLOAT, CL_BGRA},
+    {CM_SURF_FMT_BGRX32F, CL_FLOAT, CL_BGRA},
+    {CM_SURF_FMT_RGBX4, 500, CL_RGB},
+    {CM_SURF_FMT_RGB5_X1, CL_UNORM_INT16, CL_BGRA},
+    {CM_SURF_FMT_RGBX8, CL_UNORM_INT8, CL_RGBA},
+    {CM_SURF_FMT_RGB10_X2, CL_UNORM_INT_101010, CL_RGBA},
+    {CM_SURF_FMT_RGBX16, CL_UNORM_INT16, CL_RGBA},
+    {CM_SURF_FMT_RGBX16F, CL_HALF_FLOAT, CL_RGBA},
+    {CM_SURF_FMT_RGBX32F, CL_FLOAT, CL_RGBA},
+    {CM_SURF_FMT_BGRA4, 500, CL_BGRA},
+    {CM_SURF_FMT_BGR5_A1, CL_UNSIGNED_INT16, CL_BGRA},
+    {CM_SURF_FMT_BGRA8, CL_UNORM_INT8, CL_BGRA},
+    {CM_SURF_FMT_BGR10_A2, 500, CL_BGRA},
+    {CM_SURF_FMT_BGRA16, CL_UNORM_INT16, CL_BGRA},
+    {CM_SURF_FMT_BGRA16F, CL_UNORM_INT16, CL_BGRA},
+    {CM_SURF_FMT_BGRA32F, CL_FLOAT, CL_BGRA},
+    {CM_SURF_FMT_RGBA4, 500, CL_RGBA},
+    {CM_SURF_FMT_RGB5_A1, CL_UNSIGNED_INT16, CL_RGBA},
+    {CM_SURF_FMT_RGBA8, CL_UNORM_INT8, CL_RGBA},
+    {CM_SURF_FMT_RGB10_A2, CL_UNORM_INT_101010, CL_RGB},
+    {CM_SURF_FMT_RGBA16, CL_UNORM_INT16, CL_RGBA},
+    {CM_SURF_FMT_RGBA16F, CL_HALF_FLOAT, CL_RGBA},
+    {CM_SURF_FMT_RGBA32I, CL_UNSIGNED_INT32, CL_RGBA},
+    {CM_SURF_FMT_RGBA32F, CL_FLOAT, CL_RGBA},
+    {CM_SURF_FMT_DUDV8, CL_UNSIGNED_INT8, CL_RG},
+    {CM_SURF_FMT_DXT1, 500, CL_R},
+    {CM_SURF_FMT_DXT2_3, 500, CL_R},
+    {CM_SURF_FMT_DXT4_5, 500, CL_R},
+    {CM_SURF_FMT_ATI1N, 500, CL_R},
+    {CM_SURF_FMT_ATI2N, 500, CL_R},
+    {CM_SURF_FMT_DEPTH16, CL_UNORM_INT16, CL_DEPTH},
+    {CM_SURF_FMT_DEPTH16F, CL_HALF_FLOAT, CL_DEPTH},
+    {CM_SURF_FMT_DEPTH24_X8, 500, CL_DEPTH},
+    {CM_SURF_FMT_DEPTH24F_X8, 500, CL_DEPTH},
+    {CM_SURF_FMT_DEPTH24_STEN8, CL_UNORM_INT24, CL_DEPTH_STENCIL},
+    {CM_SURF_FMT_DEPTH24F_STEN8, 500, CL_DEPTH_STENCIL},
+    {CM_SURF_FMT_DEPTH32F_X24_STEN8, CL_FLOAT, CL_DEPTH_STENCIL},
+    {CM_SURF_FMT_DEPTH32F, CL_FLOAT, CL_DEPTH},
+    {CM_SURF_FMT_sR11_sG11_sB10, 500, CL_R},
+    {CM_SURF_FMT_sU16, CL_SNORM_INT16, CL_R},
+    {CM_SURF_FMT_sUV16, CL_SNORM_INT16, CL_RG},
+    {CM_SURF_FMT_sUVWQ16, CL_SNORM_INT16, CL_RGBA},
+    {CM_SURF_FMT_RG16, CL_UNORM_INT16, CL_RG},
+    {CM_SURF_FMT_RG16F, CL_HALF_FLOAT, CL_RG},
+    {CM_SURF_FMT_RG32F, CL_FLOAT, CL_RG},
+    {CM_SURF_FMT_ABGR4, 500, CL_ARGB},
+    {CM_SURF_FMT_A1_BGR5, CL_UNSIGNED_INT16, CL_ARGB},
+    {CM_SURF_FMT_ABGR8, CL_UNORM_INT8, CL_ARGB},
+    {CM_SURF_FMT_A2_BGR10, CL_UNORM_INT_101010, CL_RGB},
+    {CM_SURF_FMT_ABGR16, CL_UNORM_INT16, CL_ARGB},
+    {CM_SURF_FMT_ABGR16F, CL_HALF_FLOAT, CL_ARGB},
+    {CM_SURF_FMT_ABGR32F, CL_FLOAT, CL_ARGB},
+    {CM_SURF_FMT_DXT1A, 500, CL_R},
+    {CM_SURF_FMT_sRGB10_A2, 500, CL_RGBA},
+    {CM_SURF_FMT_sR8, CL_SNORM_INT8, CL_R},
+    {CM_SURF_FMT_sRG8, CL_SNORM_INT8, CL_RG},
+    {CM_SURF_FMT_sR32I, CL_SIGNED_INT32, CL_R},
+    {CM_SURF_FMT_sRG32I, CL_SIGNED_INT32, CL_RG},
+    {CM_SURF_FMT_sRGBA32I, CL_SIGNED_INT32, CL_RGBA},
+    {CM_SURF_FMT_R32I, CL_UNSIGNED_INT32, CL_R},
+    {CM_SURF_FMT_RG32I, CL_UNSIGNED_INT32, CL_RG},
+    {CM_SURF_FMT_RG8, CL_UNORM_INT8, CL_RG},
+    {CM_SURF_FMT_sRGBA8, CL_SNORM_INT8, CL_RGBA},
+    {CM_SURF_FMT_R11F_G11F_B10F, 500, CL_RGBA},
+    {CM_SURF_FMT_RGB9_E5, CL_UNORM_INT8, CL_ARGB},
+    {CM_SURF_FMT_LUMINANCE_LATC1, 500, CL_RGBA},
+    {CM_SURF_FMT_SIGNED_LUMINANCE_LATC1, 500, CL_RGBA},
+    {CM_SURF_FMT_LUMINANCE_ALPHA_LATC2, 500, CL_RGBA},
+    {CM_SURF_FMT_SIGNED_LUMINANCE_ALPHA_LATC2, 500, CL_RGBA},
+    {CM_SURF_FMT_RED_RGTC1, 500, CL_RGBA},
+    {CM_SURF_FMT_SIGNED_RED_RGTC1, 500, CL_RGBA},
+    {CM_SURF_FMT_RED_GREEN_RGTC2, 500, CL_RGBA},
+    {CM_SURF_FMT_SIGNED_RED_GREEN_RGTC2, 500, CL_RGBA},
+    {CM_SURF_FMT_R8, CL_UNORM_INT8, CL_R},
+    {CM_SURF_FMT_R16, CL_UNORM_INT16, CL_R},
+    {CM_SURF_FMT_R16F, CL_HALF_FLOAT, CL_R},
+    {CM_SURF_FMT_R32F, CL_FLOAT, CL_R},
+    {CM_SURF_FMT_R8I, CL_UNSIGNED_INT8, CL_R},
+    {CM_SURF_FMT_sR8I, CL_SIGNED_INT8, CL_R},
+    {CM_SURF_FMT_RG8I, CL_UNSIGNED_INT8, CL_RG},
+    {CM_SURF_FMT_sRG8I, CL_SIGNED_INT8, CL_RG},
+    {CM_SURF_FMT_R16I, CL_UNSIGNED_INT16, CL_R},
+    {CM_SURF_FMT_sR16I, CL_SIGNED_INT16, CL_R},
+    {CM_SURF_FMT_RG16I, CL_UNSIGNED_INT16, CL_RG},
+    {CM_SURF_FMT_sRG16I, CL_SIGNED_INT16, CL_RG},
+    {CM_SURF_FMT_RGBA32UI, CL_UNSIGNED_INT32, CL_RGBA},
+    {CM_SURF_FMT_RGBX32UI, CL_UNSIGNED_INT32, CL_RGBA},
+    {CM_SURF_FMT_ALPHA32UI, CL_UNSIGNED_INT32, CL_R},
+    {CM_SURF_FMT_INTENSITY32UI, CL_UNSIGNED_INT32, CL_R},
+    {CM_SURF_FMT_LUMINANCE32UI, CL_UNSIGNED_INT32, CL_R},
+    {CM_SURF_FMT_LUMINANCE_ALPHA32UI, CL_UNSIGNED_INT32, CL_RG},
+    {CM_SURF_FMT_RGBA16UI, CL_UNSIGNED_INT16, CL_RGBA},
+    {CM_SURF_FMT_RGBX16UI, CL_UNSIGNED_INT16, CL_RGBA},
+    {CM_SURF_FMT_ALPHA16UI, CL_UNSIGNED_INT16, CL_R},
+    {CM_SURF_FMT_INTENSITY16UI, CL_UNSIGNED_INT16, CL_R},
+    {CM_SURF_FMT_LUMINANCE16UI, CL_UNSIGNED_INT16, CL_R},
+    {CM_SURF_FMT_LUMINANCE_ALPHA16UI, CL_UNSIGNED_INT32, CL_RG},
+    {CM_SURF_FMT_RGBA8UI, CL_UNSIGNED_INT8, CL_RGBA},
+    {CM_SURF_FMT_RGBX8UI, CL_UNORM_INT8, CL_RGBA},
+    {CM_SURF_FMT_ALPHA8UI, CL_UNSIGNED_INT8, CL_R},
+    {CM_SURF_FMT_INTENSITY8UI, CL_UNSIGNED_INT8, CL_R},
+    {CM_SURF_FMT_LUMINANCE8UI, CL_UNSIGNED_INT8, CL_R},
+    {CM_SURF_FMT_LUMINANCE_ALPHA8UI, CL_UNSIGNED_INT8, CL_RG},
+    {CM_SURF_FMT_sRGBX32I, CL_SIGNED_INT32, CL_RGBA},
+    {CM_SURF_FMT_sALPHA32I, CL_SIGNED_INT32, CL_R},
+    {CM_SURF_FMT_sINTENSITY32I, CL_SIGNED_INT32, CL_R},
+    {CM_SURF_FMT_sLUMINANCE32I, CL_SIGNED_INT32, CL_R},
+    {CM_SURF_FMT_sLUMINANCE_ALPHA32I, CL_SIGNED_INT32, CL_RG},
+    {CM_SURF_FMT_sRGBA16I, CL_SIGNED_INT16, CL_RGBA},
+    {CM_SURF_FMT_sRGBX16I, CL_SIGNED_INT16, CL_RGBA},
+    {CM_SURF_FMT_sALPHA16I, CL_SIGNED_INT16, CL_R},
+    {CM_SURF_FMT_sINTENSITY16I, CL_SIGNED_INT16, CL_R},
+    {CM_SURF_FMT_sLUMINANCE16I, CL_SIGNED_INT16, CL_R},
+    {CM_SURF_FMT_sLUMINANCE_ALPHA16I, CL_SIGNED_INT16, CL_RG},
+    {CM_SURF_FMT_sRGBA8I, CL_SIGNED_INT8, CL_RGBA},
+    {CM_SURF_FMT_sRGBX8I, CL_SIGNED_INT8, CL_RGBA},
+    {CM_SURF_FMT_sALPHA8I, CL_SIGNED_INT8, CL_R},
+    {CM_SURF_FMT_sINTENSITY8I, CL_SIGNED_INT8, CL_R},
+    {CM_SURF_FMT_sLUMINANCE8I, CL_SIGNED_INT8, CL_R},
+    {CM_SURF_FMT_sLUMINANCE_ALPHA8I, CM_SURF_FMT_sRG8I, CL_RG},
+    {CM_SURF_FMT_sDXT6, 500, CL_R},
+    {CM_SURF_FMT_DXT6, 500, CL_R},
+    {CM_SURF_FMT_DXT7, 500, CL_R},
+    {CM_SURF_FMT_LUMINANCE8_SNORM, CL_SNORM_INT8, CL_R},
+    {CM_SURF_FMT_LUMINANCE16_SNORM, CL_SNORM_INT16, CL_R},
+    {CM_SURF_FMT_INTENSITY8_SNORM, CL_SNORM_INT8, CL_R},
+    {CM_SURF_FMT_INTENSITY16_SNORM, CL_SNORM_INT16, CL_R},
+    {CM_SURF_FMT_ALPHA8_SNORM, CL_SNORM_INT8, CL_R},
+    {CM_SURF_FMT_ALPHA16_SNORM, CL_SNORM_INT16, CL_R},
+    {CM_SURF_FMT_LUMINANCE_ALPHA8_SNORM, CL_SNORM_INT8, CL_RG},
+    {CM_SURF_FMT_LUMINANCE_ALPHA16_SNORM, CL_SNORM_INT16, CL_RG},
+    {CM_SURF_FMT_R8_SNORM, CL_SNORM_INT8, CL_R},
+    {CM_SURF_FMT_R16_SNORM, CL_SNORM_INT16, CL_R},
+    {CM_SURF_FMT_RG8_SNORM, CL_SNORM_INT8, CL_RG},
+    {CM_SURF_FMT_RG16_SNORM, CL_SNORM_INT16, CL_RG},
+    {CM_SURF_FMT_RGBX8_SNORM, CL_SNORM_INT8, CL_RGBA},
+    {CM_SURF_FMT_RGBX16_SNORM, CL_SNORM_INT16, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_SNORM, CL_SNORM_INT8, CL_RGBA},
+    {CM_SURF_FMT_RGBA16_SNORM, CL_SNORM_INT16, CL_RGBA},
+    {CM_SURF_FMT_RGB10_A2UI, 500, CL_RGBA},
+    {CM_SURF_FMT_RGB32F, 500, CL_RGBA},
+    {CM_SURF_FMT_RGB32I, 500, CL_RGBA},
+    {CM_SURF_FMT_RGB32UI, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBX8_SRGB, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_SRGB, 500, CL_RGBA},
+    {CM_SURF_FMT_DXT1_SRGB, 500, CL_RGBA},
+    {CM_SURF_FMT_DXT1A_SRGB, 500, CL_RGBA},
+    {CM_SURF_FMT_DXT2_3_SRGB, 500, CL_RGBA},
+    {CM_SURF_FMT_DXT4_5_SRGB, 500, CL_RGBA},
+    {CM_SURF_FMT_DXT7_SRGB, 500, CL_RGBA},
+    {CM_SURF_FMT_RGB8_ETC2, 500, CL_RGB},
+    {CM_SURF_FMT_SRGB8_ETC2, 500, CL_RGB},
+    {CM_SURF_FMT_RGB8_PT_ALPHA1_ETC2, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGB8_PT_ALPHA1_ETC2, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_ETC2_EAC, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGB8_ALPHA8_ETC2_EAC, 500, CL_RGBA},
+    {CM_SURF_FMT_R11_EAC, 500, CL_R},
+    {CM_SURF_FMT_SIGNED_R11_EAC, 500, CL_R},
+    {CM_SURF_FMT_RG11_EAC, 500, CL_RG},
+    {CM_SURF_FMT_SIGNED_RG11_EAC, 500, CL_RG},
+    {CM_SURF_FMT_RGBA8_ASTC_4x4, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_ASTC_5x4, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_ASTC_5x5, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_ASTC_6x5, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_ASTC_6x6, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_ASTC_8x5, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_ASTC_8x6, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_ASTC_8x8, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_ASTC_10x5, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_ASTC_10x6, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_ASTC_10x8, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_ASTC_10x10, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_ASTC_12x10, 500, CL_RGBA},
+    {CM_SURF_FMT_RGBA8_ASTC_12x12, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGBA8_ASTC_4x4, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGBA8_ASTC_5x4, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGBA8_ASTC_5x5, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGBA8_ASTC_6x5, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGBA8_ASTC_6x6, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGBA8_ASTC_8x5, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGBA8_ASTC_8x6, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGBA8_ASTC_8x8, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGBA8_ASTC_10x5, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGBA8_ASTC_10x6, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGBA8_ASTC_10x8, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGBA8_ASTC_10x10, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGBA8_ASTC_12x10, 500, CL_RGBA},
+    {CM_SURF_FMT_SRGBA8_ASTC_12x12, 500, CL_RGBA},
+    {CM_SURF_FMT_BGR10_A2UI, 500, CL_BGRA},
+    {CM_SURF_FMT_A2_BGR10UI, 500, CL_ARGB},
+    {CM_SURF_FMT_A2_RGB10UI, 500, CL_ABGR},
+    {CM_SURF_FMT_B5_G6_R5UI, 500, CL_BGRA},
+    {CM_SURF_FMT_R5_G6_B5UI, 500, CL_RGBA},
+    {CM_SURF_FMT_DEPTH32F_X24_STEN8_UNCLAMPED, CL_UNSIGNED_INT32, CL_R},
+    {CM_SURF_FMT_DEPTH32F_UNCLAMPED, CL_FLOAT, CL_R},
+    {CM_SURF_FMT_L8_X16_A8_SRGB, 500, CL_RGBA},
+    {CM_SURF_FMT_L8_X24_SRGB, 500, CL_RGBA},
+    {CM_SURF_FMT_STENCIL8, CL_UNSIGNED_INT8, CL_R},
 };
 
 bool Device::initGLInteropPrivateExt(void* GLplatformContext, void* GLdeviceContext) const {
@@ -557,8 +656,8 @@ bool Device::initGLInteropPrivateExt(void* GLplatformContext, void* GLdeviceCont
     return false;
   }
 
-  pfnMesaGLInteropGLXQueryDeviceInfo = (PFNMesaGLInteropGLXQueryDeviceInfo)dlsym(
-    pModule, "MesaGLInteropGLXQueryDeviceInfo");
+  pfnMesaGLInteropGLXQueryDeviceInfo =
+      (PFNMesaGLInteropGLXQueryDeviceInfo)dlsym(pModule, "MesaGLInteropGLXQueryDeviceInfo");
   if (nullptr == pfnMesaGLInteropGLXQueryDeviceInfo) {
     return false;
   }
@@ -634,17 +733,17 @@ bool Device::glCanInterop(void* GLplatformContext, void* GLdeviceContext) const
         ((1 << properties().gpuIndex) == glChainBitMask);
   }
 #else
-  GLuint glDeviceId = 0 ;
-  GLuint glChainMask = 0 ;
+  GLuint glDeviceId = 0;
+  GLuint glChainMask = 0;
   GLXContext ctx = static_cast<GLXContext>(GLplatformContext);
   Display* disp = static_cast<Display*>(GLdeviceContext);
 
 
   if (glXGetContextMVPUInfoAMD(ctx, &glDeviceId, &glChainMask)) {
-      mesa_glinterop_device_info info = {};
+    mesa_glinterop_device_info info = {};
     if (pfnMesaGLInteropGLXQueryDeviceInfo(disp, ctx, &info) == 0) {
-        // match the adapter
-        canInteroperate = (properties().pciProperties.busNumber == info.pci_bus) &&
+      // match the adapter
+      canInteroperate = (properties().pciProperties.busNumber == info.pci_bus) &&
           (properties().pciProperties.deviceNumber == info.pci_device) &&
           (properties().pciProperties.functionNumber == info.pci_function) &&
           (static_cast<GLuint>(1 << properties().gpuIndex) == glChainMask);
@@ -749,7 +848,7 @@ bool Device::resGLAssociate(void* GLContext, uint name, uint type, Pal::OsExtern
     return status;
   }
   assert(static_cast<cmSurfFmt>(hData.format) == cmFormatXlateTable[index].raw_cmFormat);
-  cl_channel_type    imageDataType;
+  cl_channel_type imageDataType;
   imageDataType = cmFormatXlateTable[index].image_channel_data_type;
   if (imageDataType == 500) {
     LogError("\nGL surface is not supported by OCL\n");
@@ -819,4 +918,4 @@ bool Device::resGLFree(void* GLplatformContext, void* mbResHandle, uint type) co
 #endif
 }
 
-}  // pal
+}  // namespace pal
diff --git a/projects/clr/rocclr/runtime/device/pal/palgpuopen.cpp b/projects/clr/rocclr/runtime/device/pal/palgpuopen.cpp
index 277d8dec86..ac6ee980be 100644
--- a/projects/clr/rocclr/runtime/device/pal/palgpuopen.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palgpuopen.cpp
@@ -32,34 +32,27 @@
 #include "protocols/rgpServer.h"
 #include "protocols/driverControlServer.h"
 
-namespace pal
-{
+namespace pal {
 // ================================================================================================
 RgpCaptureMgr::RgpCaptureMgr(Pal::IPlatform* platform, const Device& device)
-  :
-  device_(device),
-  dev_driver_server_(platform->GetDevDriverServer()),
-  user_event_(nullptr),
-  num_prep_disp_(0),
-  max_sqtt_disp_(device_.settings().rgpSqttDispCount_),
-  trace_gpu_mem_limit_(0),
-  global_disp_count_(1),      // Must start from 1 according to RGP spec
-  trace_enabled_(false),
-  inst_tracing_enabled_(false)
-{
+    : device_(device),
+      dev_driver_server_(platform->GetDevDriverServer()),
+      user_event_(nullptr),
+      num_prep_disp_(0),
+      max_sqtt_disp_(device_.settings().rgpSqttDispCount_),
+      trace_gpu_mem_limit_(0),
+      global_disp_count_(1),  // Must start from 1 according to RGP spec
+      trace_enabled_(false),
+      inst_tracing_enabled_(false) {
   memset(&trace_, 0, sizeof(trace_));
 }
 
 // ================================================================================================
-RgpCaptureMgr::~RgpCaptureMgr()
-{
-  DestroyRGPTracing();
-}
+RgpCaptureMgr::~RgpCaptureMgr() { DestroyRGPTracing(); }
 
 // ================================================================================================
 // Creates the GPU Open Developer Mode manager class.
-RgpCaptureMgr* RgpCaptureMgr::Create(Pal::IPlatform* platform, const Device& device)
-{
+RgpCaptureMgr* RgpCaptureMgr::Create(Pal::IPlatform* platform, const Device& device) {
   RgpCaptureMgr* mgr = new RgpCaptureMgr(platform, device);
 
   if (mgr != nullptr && !mgr->Init(platform)) {
@@ -71,8 +64,7 @@ RgpCaptureMgr* RgpCaptureMgr::Create(Pal::IPlatform* platform, const Device& dev
 }
 
 // ================================================================================================
-bool RgpCaptureMgr::Init(Pal::IPlatform* platform)
-{
+bool RgpCaptureMgr::Init(Pal::IPlatform* platform) {
   if (dev_driver_server_ == nullptr) {
     return false;
   }
@@ -105,13 +97,11 @@ bool RgpCaptureMgr::Init(Pal::IPlatform* platform)
 
     const uint32_t api_version = settings.oclVersion_;
 
-    trace_.gpa_session_ = new GpuUtil::GpaSession(
-        platform,
-        device_.iDev(),
-        api_version >> 4,   // OCL API version major
-        api_version & 0xf,  // OCL API version minor
-        RgpSqttInstrumentationSpecVersion,
-        RgpSqttInstrumentationApiVersion);
+    trace_.gpa_session_ = new GpuUtil::GpaSession(platform, device_.iDev(),
+                                                  api_version >> 4,   // OCL API version major
+                                                  api_version & 0xf,  // OCL API version minor
+                                                  RgpSqttInstrumentationSpecVersion,
+                                                  RgpSqttInstrumentationApiVersion);
 
     if (trace_.gpa_session_ == nullptr) {
       result = false;
@@ -119,7 +109,7 @@ bool RgpCaptureMgr::Init(Pal::IPlatform* platform)
   }
 
   // Initialize the GPA session
-  if (result &&  (trace_.gpa_session_->Init() != Pal::Result::Success)) {
+  if (result && (trace_.gpa_session_->Init() != Pal::Result::Success)) {
     result = false;
   }
 
@@ -133,9 +123,9 @@ bool RgpCaptureMgr::Init(Pal::IPlatform* platform)
   if (!result) {
     // If we've failed to initialize tracing, permanently disable traces
     if (rgp_server_ != nullptr) {
-        rgp_server_->DisableTraces();
+      rgp_server_->DisableTraces();
 
-        trace_enabled_ = false;
+      trace_enabled_ = false;
     }
 
     // Clean up if we failed
@@ -150,9 +140,8 @@ bool RgpCaptureMgr::Init(Pal::IPlatform* platform)
 // ================================================================================================
 // This function finds out all the queues in the device that we have to synchronize for RGP-traced
 // frames and initializes resources for them.
-bool RgpCaptureMgr::RegisterTimedQueue(
-  uint32_t queue_id, Pal::IQueue* iQueue, bool* debug_vmid) const
-{
+bool RgpCaptureMgr::RegisterTimedQueue(uint32_t queue_id, Pal::IQueue* iQueue,
+                                       bool* debug_vmid) const {
   bool result = true;
 
   // Get the OS context handle for this queue (this is a thing that RGP needs on DX clients;
@@ -166,8 +155,8 @@ bool RgpCaptureMgr::RegisterTimedQueue(
   *debug_vmid = kernelContextInfo.flags.hasDebugVmid;
 
   // Register the queue with the GPA session class for timed queue operation support.
-  if (trace_.gpa_session_->RegisterTimedQueue(iQueue, queue_id,
-      kernelContextInfo.contextIdentifier) != Pal::Result::Success) {
+  if (trace_.gpa_session_->RegisterTimedQueue(
+          iQueue, queue_id, kernelContextInfo.contextIdentifier) != Pal::Result::Success) {
     result = false;
   }
 
@@ -175,11 +164,8 @@ bool RgpCaptureMgr::RegisterTimedQueue(
 }
 
 // ================================================================================================
-Pal::Result RgpCaptureMgr::TimedQueueSubmit(
-  Pal::IQueue*  queue,
-  uint64_t      cmdId,
-  const Pal::SubmitInfo& submitInfo) const
-{
+Pal::Result RgpCaptureMgr::TimedQueueSubmit(Pal::IQueue* queue, uint64_t cmdId,
+                                            const Pal::SubmitInfo& submitInfo) const {
   // Fill in extra meta-data information to associate the API command buffer data with
   // the generated timing information.
   GpuUtil::TimedSubmitInfo timedSubmitInfo = {};
@@ -205,8 +191,7 @@ Pal::Result RgpCaptureMgr::TimedQueueSubmit(
 // Called during initial device enumeration prior to calling Pal::IDevice::CommitSettingsAndInit().
 //
 // This finalizes the developer driver manager.
-void RgpCaptureMgr::Finalize()
-{
+void RgpCaptureMgr::Finalize() {
   // Figure out if the gfxip supports tracing.  We decide tracing if there is at least one
   // enumerated GPU that can support tracing.  Since we don't yet know if that GPU will be
   // picked as the target of an eventual VkDevice, this check is imperfect.
@@ -215,8 +200,8 @@ void RgpCaptureMgr::Finalize()
   bool hw_support_tracing = false;
 
   if ((rgp_server_->EnableTraces() == DevDriver::Result::Success)) {
-   if (GpuSupportsTracing(device_.properties(), device_.settings())) {
-     hw_support_tracing = true;
+    if (GpuSupportsTracing(device_.properties(), device_.settings())) {
+      hw_support_tracing = true;
     }
   }
 
@@ -234,20 +219,18 @@ void RgpCaptureMgr::Finalize()
 
 // ================================================================================================
 // Waits for the driver to be resumed if it's currently paused.
-void RgpCaptureMgr::WaitForDriverResume()
-{
-    auto* pDriverControlServer = dev_driver_server_->GetDriverControlServer();
+void RgpCaptureMgr::WaitForDriverResume() {
+  auto* pDriverControlServer = dev_driver_server_->GetDriverControlServer();
 
-    assert(pDriverControlServer != nullptr);
+  assert(pDriverControlServer != nullptr);
 
-    pDriverControlServer->WaitForDriverResume();
+  pDriverControlServer->WaitForDriverResume();
 }
 
 // ================================================================================================
 // Called before a swap chain presents.  This signals a frame-end boundary and
 // is used to coordinate RGP trace start/stop.
-void RgpCaptureMgr::PostDispatch(VirtualGPU* gpu)
-{
+void RgpCaptureMgr::PostDispatch(VirtualGPU* gpu) {
   if (rgp_server_->TracesEnabled()) {
     // If there's currently a trace running, submit the trace-end command buffer
     if (trace_.status_ == TraceStatus::Running) {
@@ -257,8 +240,7 @@ void RgpCaptureMgr::PostDispatch(VirtualGPU* gpu)
         Pal::Result res = EndRGPHardwareTrace(gpu);
         if (Pal::Result::ErrorIncompatibleQueue == res) {
           // continue until we find the right queue...
-        }
-        else if (Pal::Result::Success == res) {
+        } else if (Pal::Result::Success == res) {
           trace_.sqtt_disp_count_ = 0;
         } else {
           FinishRGPTrace(gpu, true);
@@ -272,43 +254,42 @@ void RgpCaptureMgr::PostDispatch(VirtualGPU* gpu)
 
       // Currently nothing in the PresentInfo struct is used for inserting a timed present marker.
       GpuUtil::TimedQueuePresentInfo timedPresentInfo = {};
-      //Pal::Result result = trace_.gpa_session_->TimedQueuePresent(pPalQueue, timedPresentInfo);
-      //assert(result == Pal::Result::Success);
+      // Pal::Result result = trace_.gpa_session_->TimedQueuePresent(pPalQueue, timedPresentInfo);
+      // assert(result == Pal::Result::Success);
     }
   }
 }
 
 // ================================================================================================
-Pal::Result RgpCaptureMgr::CheckForTraceResults()
-{
+Pal::Result RgpCaptureMgr::CheckForTraceResults() {
   assert(trace_.status_ == TraceStatus::WaitingForResults);
 
   Pal::Result result = Pal::Result::NotReady;
 
   // Check if trace results are ready
-  if (trace_.gpa_session_->IsReady() && // GPA session is ready
-      (trace_.begin_queue_->isDone(&trace_.end_event_)))   // "Trace end" cmdbuf has retired
+  if (trace_.gpa_session_->IsReady() &&                   // GPA session is ready
+      (trace_.begin_queue_->isDone(&trace_.end_event_)))  // "Trace end" cmdbuf has retired
   {
     bool success = false;
 
     // Fetch required trace data size from GPA session
     size_t traceDataSize = 0;
-    void* pTraceData     = nullptr;
+    void* pTraceData = nullptr;
 
     trace_.gpa_session_->GetResults(trace_.gpa_sample_id_, &traceDataSize, nullptr);
 
     // Allocate memory for trace data
     if (traceDataSize > 0) {
-        pTraceData = amd::AlignedMemory::allocate(traceDataSize, 256);
+      pTraceData = amd::AlignedMemory::allocate(traceDataSize, 256);
     }
 
     if (pTraceData != nullptr) {
       // Get trace data from GPA session
       if (trace_.gpa_session_->GetResults(trace_.gpa_sample_id_, &traceDataSize, pTraceData) ==
-        Pal::Result::Success) {
+          Pal::Result::Success) {
         // Transmit trace data to anyone who's listening
-        auto devResult = rgp_server_->WriteTraceData(
-            static_cast<Pal::uint8*>(pTraceData), traceDataSize);
+        auto devResult =
+            rgp_server_->WriteTraceData(static_cast<Pal::uint8*>(pTraceData), traceDataSize);
 
         success = (devResult == DevDriver::Result::Success);
       }
@@ -317,7 +298,7 @@ Pal::Result RgpCaptureMgr::CheckForTraceResults()
     }
 
     if (success) {
-        result = Pal::Result::Success;
+      result = Pal::Result::Success;
     }
   }
 
@@ -327,9 +308,8 @@ Pal::Result RgpCaptureMgr::CheckForTraceResults()
 // ================================================================================================
 // Called after a swap chain presents.  This signals a (next) frame-begin boundary and is
 // used to coordinate RGP trace start/stop.
-void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel,
-  size_t x, size_t y, size_t z)
-{
+void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel, size_t x, size_t y,
+                                size_t z) {
   // Wait for the driver to be resumed in case it's been paused.
   WaitForDriverResume();
 
@@ -347,8 +327,7 @@ void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel,
           }
         }
       }
-    }
-    else if (trace_.status_ == TraceStatus::Preparing) {
+    } else if (trace_.status_ == TraceStatus::Preparing) {
       // Wait some number of "preparation frames" before starting the trace in order to get enough
       // timer samples to sync CPU/GPU clock domains.
       trace_.prepared_disp_count_++;
@@ -370,7 +349,7 @@ void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel,
     // Check if we're ending a trace waiting for SQTT to turn off.
     // If SQTT has turned off, end the trace
     else if (trace_.status_ == TraceStatus::WaitingForSqtt) {
-      Pal::Result result      = Pal::Result::Success;
+      Pal::Result result = Pal::Result::Success;
 
       if (trace_.begin_queue_->isDone(&trace_.end_sqtt_event_)) {
         result = EndRGPTrace(gpu);
@@ -401,14 +380,17 @@ void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel,
       RgpSqttMarkerEventType apiEvent = RgpSqttMarkerEventType::CmdNDRangeKernel;
       if (kernel.prog().isInternal()) {
         constexpr RgpSqttMarkerEventType ApiEvents[KernelBlitManager::BlitTotal] = {
-          RgpSqttMarkerEventType::CmdCopyImage, RgpSqttMarkerEventType::CmdCopyImage,
-          RgpSqttMarkerEventType::CmdCopyImageToBuffer,
-          RgpSqttMarkerEventType::CmdCopyBufferToImage,
-          RgpSqttMarkerEventType::CmdCopyBuffer, RgpSqttMarkerEventType::CmdCopyBuffer,
-          RgpSqttMarkerEventType::CmdCopyBuffer, RgpSqttMarkerEventType::CmdCopyBuffer,
-          RgpSqttMarkerEventType::CmdFillBuffer, RgpSqttMarkerEventType::CmdFillImage,
-          RgpSqttMarkerEventType::CmdScheduler
-        };
+            RgpSqttMarkerEventType::CmdCopyImage,
+            RgpSqttMarkerEventType::CmdCopyImage,
+            RgpSqttMarkerEventType::CmdCopyImageToBuffer,
+            RgpSqttMarkerEventType::CmdCopyBufferToImage,
+            RgpSqttMarkerEventType::CmdCopyBuffer,
+            RgpSqttMarkerEventType::CmdCopyBuffer,
+            RgpSqttMarkerEventType::CmdCopyBuffer,
+            RgpSqttMarkerEventType::CmdCopyBuffer,
+            RgpSqttMarkerEventType::CmdFillBuffer,
+            RgpSqttMarkerEventType::CmdFillImage,
+            RgpSqttMarkerEventType::CmdScheduler};
         for (uint i = 0; i < KernelBlitManager::BlitTotal; ++i) {
           if (kernel.name().compare(BlitName[i]) == 0) {
             apiEvent = ApiEvents[i];
@@ -418,8 +400,8 @@ void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel,
       }
       WriteUserEventMarker(gpu, RgpSqttMarkerUserEventObjectName, kernel.name());
       // Write disaptch marker
-      WriteEventWithDimsMarker(gpu, apiEvent,
-        static_cast<uint32_t>(x), static_cast<uint32_t>(y), static_cast<uint32_t>(z));
+      WriteEventWithDimsMarker(gpu, apiEvent, static_cast<uint32_t>(x), static_cast<uint32_t>(y),
+                               static_cast<uint32_t>(z));
     }
   }
 
@@ -428,11 +410,11 @@ void RgpCaptureMgr::PreDispatch(VirtualGPU* gpu, const HSAILKernel& kernel,
 
 // ================================================================================================
 // This function starts preparing for an RGP trace.  Preparation involves some N frames of
-// lead-up time during which timing samples are accumulated to synchronize CPU and GPU clock domains.
+// lead-up time during which timing samples are accumulated to synchronize CPU and GPU clock
+// domains.
 //
 // This function transitions from the Idle state to the Preparing state.
-Pal::Result RgpCaptureMgr::PrepareRGPTrace(VirtualGPU* gpu)
-{
+Pal::Result RgpCaptureMgr::PrepareRGPTrace(VirtualGPU* gpu) {
   assert(trace_.status_ == TraceStatus::Idle);
 
   // We can only trace using a single device at a time currently, so recreate RGP trace
@@ -441,32 +423,32 @@ Pal::Result RgpCaptureMgr::PrepareRGPTrace(VirtualGPU* gpu)
 
   const auto traceParameters = rgp_server_->QueryTraceParameters();
 
-  num_prep_disp_   = traceParameters.captureStartIndex;
+  num_prep_disp_ = traceParameters.captureStartIndex;
   uint32_t capture_disp = traceParameters.captureStopIndex - traceParameters.captureStartIndex;
   // Validate if the captured dispatches are in the range
   if ((capture_disp > 0) && (capture_disp < max_sqtt_disp_)) {
     max_sqtt_disp_ = capture_disp;
   }
 
-  trace_gpu_mem_limit_  = traceParameters.gpuMemoryLimitInMb * 1024 * 1024;
+  trace_gpu_mem_limit_ = traceParameters.gpuMemoryLimitInMb * 1024 * 1024;
   inst_tracing_enabled_ = traceParameters.flags.enableInstructionTokens;
 
   // Notify the RGP server that we are starting a trace
   if (rgp_server_->BeginTrace() != DevDriver::Result::Success) {
-      result = Pal::Result::ErrorUnknown;
+    result = Pal::Result::ErrorUnknown;
   }
 
   // Tell the GPA session class we're starting a trace
   if (result == Pal::Result::Success) {
     GpuUtil::GpaSessionBeginInfo info = {};
 
-    info.flags.enableQueueTiming   = true;// trace_.queueTimingEnabled;
+    info.flags.enableQueueTiming = true;  // trace_.queueTimingEnabled;
 
     result = trace_.gpa_session_->Begin(info);
   }
 
   trace_.prepared_disp_count_ = 0;
-  trace_.sqtt_disp_count_     = 0;
+  trace_.sqtt_disp_count_ = 0;
 
   // Sample the timing clocks prior to starting a trace.
   if (result == Pal::Result::Success) {
@@ -476,7 +458,7 @@ Pal::Result RgpCaptureMgr::PrepareRGPTrace(VirtualGPU* gpu)
   if (result == Pal::Result::Success) {
     // Remember which queue started the trace
     trace_.prepare_queue_ = gpu;
-    trace_.begin_queue_   = nullptr;
+    trace_.begin_queue_ = nullptr;
 
     trace_.status_ = TraceStatus::Preparing;
   } else {
@@ -497,8 +479,7 @@ Pal::Result RgpCaptureMgr::PrepareRGPTrace(VirtualGPU* gpu)
 // the "begin trace" information command buffer.
 //
 // This function transitions from the Preparing state to the Running state.
-Pal::Result RgpCaptureMgr::BeginRGPTrace(VirtualGPU* gpu)
-{
+Pal::Result RgpCaptureMgr::BeginRGPTrace(VirtualGPU* gpu) {
   assert(trace_.status_ == TraceStatus::Preparing);
   assert(trace_enabled_);
 
@@ -526,8 +507,8 @@ Pal::Result RgpCaptureMgr::BeginRGPTrace(VirtualGPU* gpu)
 
     // Fill GPU commands
     gpu->eventBegin(MainEngine);
-    trace_.gpa_sample_id_ = trace_.gpa_session_->BeginSample(
-        gpu->queue(MainEngine).iCmd(), sampleConfig);
+    trace_.gpa_sample_id_ =
+        trace_.gpa_session_->BeginSample(gpu->queue(MainEngine).iCmd(), sampleConfig);
     gpu->eventEnd(MainEngine, trace_.begin_sqtt_event_);
   }
 
@@ -540,7 +521,7 @@ Pal::Result RgpCaptureMgr::BeginRGPTrace(VirtualGPU* gpu)
 
   // Make the trace active and remember which queue started it
   if (result == Pal::Result::Success) {
-    trace_.status_      = TraceStatus::Running;
+    trace_.status_ = TraceStatus::Running;
     trace_.begin_queue_ = gpu;
   }
 
@@ -551,8 +532,7 @@ Pal::Result RgpCaptureMgr::BeginRGPTrace(VirtualGPU* gpu)
 // This function submits the command buffer to stop SQTT tracing.  Full tracing still continues.
 //
 // This function transitions from the Running state to the WaitingForSqtt state.
-Pal::Result RgpCaptureMgr::EndRGPHardwareTrace(VirtualGPU* gpu)
-{
+Pal::Result RgpCaptureMgr::EndRGPHardwareTrace(VirtualGPU* gpu) {
   assert(trace_.status_ == TraceStatus::Running);
 
   Pal::Result result = Pal::Result::Success;
@@ -593,8 +573,7 @@ Pal::Result RgpCaptureMgr::EndRGPHardwareTrace(VirtualGPU* gpu)
 // This function ends a running RGP trace.
 //
 // This function transitions from the WaitingForSqtt state to WaitingForResults state.
-Pal::Result RgpCaptureMgr::EndRGPTrace(VirtualGPU* gpu)
-{
+Pal::Result RgpCaptureMgr::EndRGPTrace(VirtualGPU* gpu) {
   assert(trace_.status_ == TraceStatus::WaitingForSqtt);
 
   Pal::Result result = Pal::Result::Success;
@@ -629,8 +608,7 @@ Pal::Result RgpCaptureMgr::EndRGPTrace(VirtualGPU* gpu)
 // ================================================================================================
 // This function resets and possibly cancels a currently active (between begin/end) RGP trace.
 // It frees any dependent resources.
-void RgpCaptureMgr::FinishRGPTrace(VirtualGPU* gpu, bool aborted)
-{
+void RgpCaptureMgr::FinishRGPTrace(VirtualGPU* gpu, bool aborted) {
   if (trace_.prepare_queue_ == nullptr) {
     return;
   }
@@ -654,26 +632,25 @@ void RgpCaptureMgr::FinishRGPTrace(VirtualGPU* gpu, bool aborted)
 
   // Reset tracing state to idle
   trace_.prepared_disp_count_ = 0;
-  trace_.sqtt_disp_count_     = 0;
-  trace_.gpa_sample_id_       = 0;
-  trace_.status_              = TraceStatus::Idle;
-  trace_.prepare_queue_       = nullptr;
-  trace_.begin_queue_         = nullptr;
+  trace_.sqtt_disp_count_ = 0;
+  trace_.gpa_sample_id_ = 0;
+  trace_.status_ = TraceStatus::Idle;
+  trace_.prepare_queue_ = nullptr;
+  trace_.begin_queue_ = nullptr;
 }
 
 // ================================================================================================
 // Destroys device-persistent RGP resources
-void RgpCaptureMgr::DestroyRGPTracing()
-{
+void RgpCaptureMgr::DestroyRGPTracing() {
   if (trace_.status_ != TraceStatus::Idle) {
-   FinishRGPTrace(nullptr, true);
+    FinishRGPTrace(nullptr, true);
   }
 
   delete user_event_;
 
   // Destroy the GPA session
   if (trace_.gpa_session_ != nullptr) {
-    //Util::Destructor(trace_.gpa_session_);
+    // Util::Destructor(trace_.gpa_session_);
     delete trace_.gpa_session_;
     trace_.gpa_session_ = nullptr;
   }
@@ -683,18 +660,15 @@ void RgpCaptureMgr::DestroyRGPTracing()
 
 // ================================================================================================
 // Returns true if the given device properties/settings support tracing.
-bool RgpCaptureMgr::GpuSupportsTracing(
-    const Pal::DeviceProperties& props,
-    const Settings&       settings)
-{
+bool RgpCaptureMgr::GpuSupportsTracing(const Pal::DeviceProperties& props,
+                                       const Settings& settings) {
   return props.gfxipProperties.flags.supportRgpTraces && !settings.rgpSqttForceDisable_;
 }
 
 // ================================================================================================
 // Called when a new device is created.  This will preallocate reusable RGP trace resources
 // for that device.
-void RgpCaptureMgr::PostDeviceCreate()
-{
+void RgpCaptureMgr::PostDeviceCreate() {
   amd::ScopedLock traceLock(&trace_mutex_);
 
   auto* pDriverControlServer = dev_driver_server_->GetDriverControlServer();
@@ -714,8 +688,7 @@ void RgpCaptureMgr::PostDeviceCreate()
 // ================================================================================================
 // Called prior to a device's being destroyed.  This will free persistent RGP trace resources for
 // that device.
-void RgpCaptureMgr::PreDeviceDestroy()
-{
+void RgpCaptureMgr::PreDeviceDestroy() {
   amd::ScopedLock traceLock(&trace_mutex_);
   // If we are idle, we can re-initialize trace resources based on the new device.
   if (trace_.status_ == TraceStatus::Idle) {
@@ -725,9 +698,8 @@ void RgpCaptureMgr::PreDeviceDestroy()
 
 // ================================================================================================
 // Sets up an Event marker's basic data.
-RgpSqttMarkerEvent RgpCaptureMgr::BuildEventMarker(
-  const VirtualGPU* gpu, RgpSqttMarkerEventType api_type) const
-{
+RgpSqttMarkerEvent RgpCaptureMgr::BuildEventMarker(const VirtualGPU* gpu,
+                                                   RgpSqttMarkerEventType api_type) const {
   RgpSqttMarkerEvent marker = {};
 
   marker.identifier = RgpSqttMarkerIdentifierEvent;
@@ -739,24 +711,19 @@ RgpSqttMarkerEvent RgpCaptureMgr::BuildEventMarker(
 }
 
 // ================================================================================================
-void RgpCaptureMgr::WriteMarker(const VirtualGPU* gpu, const void* data, size_t data_size) const
-{
+void RgpCaptureMgr::WriteMarker(const VirtualGPU* gpu, const void* data, size_t data_size) const {
   assert((data_size % sizeof(uint32_t)) == 0);
   assert((data_size / sizeof(uint32_t)) > 0);
 
-  gpu->queue(MainEngine).iCmd()->CmdInsertRgpTraceMarker(
-    static_cast<uint32_t>(data_size / sizeof(uint32_t)), data);
+  gpu->queue(MainEngine)
+      .iCmd()
+      ->CmdInsertRgpTraceMarker(static_cast<uint32_t>(data_size / sizeof(uint32_t)), data);
 }
 
 // ================================================================================================
 // Inserts an RGP pre-dispatch marker
-void RgpCaptureMgr::WriteEventWithDimsMarker(
-  const VirtualGPU*      gpu,
-  RgpSqttMarkerEventType apiType,
-  uint32_t               x,
-  uint32_t               y,
-  uint32_t               z) const
-{
+void RgpCaptureMgr::WriteEventWithDimsMarker(const VirtualGPU* gpu, RgpSqttMarkerEventType apiType,
+                                             uint32_t x, uint32_t y, uint32_t z) const {
   assert(apiType != RgpSqttMarkerEventType::Invalid);
 
   RgpSqttMarkerEventWithDims eventWithDims = {};
@@ -771,26 +738,24 @@ void RgpCaptureMgr::WriteEventWithDimsMarker(
 }
 
 // ================================================================================================
-void RgpCaptureMgr::WriteBarrierStartMarker(
-  const VirtualGPU* gpu, const Pal::Developer::BarrierData& data) const
-{
+void RgpCaptureMgr::WriteBarrierStartMarker(const VirtualGPU* gpu,
+                                            const Pal::Developer::BarrierData& data) const {
   if (rgp_server_->TracesEnabled() && (trace_.status_ == TraceStatus::Running)) {
     amd::ScopedLock traceLock(&trace_mutex_);
     RgpSqttMarkerBarrierStart marker = {};
 
     marker.identifier = RgpSqttMarkerIdentifierBarrierStart;
-    marker.cbId       = trace_.begin_queue_->queue(MainEngine).cmdBufId();
-    marker.dword02    = data.reason;
-    marker.internal   = true;
+    marker.cbId = trace_.begin_queue_->queue(MainEngine).cmdBufId();
+    marker.dword02 = data.reason;
+    marker.internal = true;
 
     WriteMarker(gpu, &marker, sizeof(marker));
   }
 }
 
 // ================================================================================================
-void RgpCaptureMgr::WriteBarrierEndMarker(
-  const VirtualGPU* gpu, const Pal::Developer::BarrierData& data) const
-{
+void RgpCaptureMgr::WriteBarrierEndMarker(const VirtualGPU* gpu,
+                                          const Pal::Developer::BarrierData& data) const {
   if (rgp_server_->TracesEnabled() && (trace_.status_ == TraceStatus::Running)) {
     amd::ScopedLock traceLock(&trace_mutex_);
     // Copy the operations part and include the same data from previous markers
@@ -799,28 +764,28 @@ void RgpCaptureMgr::WriteBarrierEndMarker(
     auto operations = data.operations;
 
     operations.pipelineStalls.u16All |= 0;
-    operations.caches.u16All         |= 0;
+    operations.caches.u16All |= 0;
 
     RgpSqttMarkerBarrierEnd marker = {};
 
-    marker.identifier           = RgpSqttMarkerIdentifierBarrierEnd;
-    marker.cbId                 = trace_.begin_queue_->queue(MainEngine).cmdBufId();
+    marker.identifier = RgpSqttMarkerIdentifierBarrierEnd;
+    marker.cbId = trace_.begin_queue_->queue(MainEngine).cmdBufId();
 
-    marker.waitOnEopTs          = operations.pipelineStalls.waitOnEopTsBottomOfPipe;
-    marker.vsPartialFlush       = operations.pipelineStalls.vsPartialFlush;
-    marker.psPartialFlush       = operations.pipelineStalls.psPartialFlush;
-    marker.csPartialFlush       = operations.pipelineStalls.csPartialFlush;
-    marker.pfpSyncMe            = operations.pipelineStalls.pfpSyncMe;
-    marker.syncCpDma            = operations.pipelineStalls.syncCpDma;
-    marker.invalTcp             = operations.caches.invalTcp;
-    marker.invalSqI             = operations.caches.invalSqI$;
-    marker.invalSqK             = operations.caches.invalSqK$;
-    marker.flushTcc             = operations.caches.flushTcc;
-    marker.invalTcc             = operations.caches.invalTcc;
-    marker.flushCb              = operations.caches.flushCb;
-    marker.invalCb              = operations.caches.invalCb;
-    marker.flushDb              = operations.caches.flushDb;
-    marker.invalDb              = operations.caches.invalDb;
+    marker.waitOnEopTs = operations.pipelineStalls.waitOnEopTsBottomOfPipe;
+    marker.vsPartialFlush = operations.pipelineStalls.vsPartialFlush;
+    marker.psPartialFlush = operations.pipelineStalls.psPartialFlush;
+    marker.csPartialFlush = operations.pipelineStalls.csPartialFlush;
+    marker.pfpSyncMe = operations.pipelineStalls.pfpSyncMe;
+    marker.syncCpDma = operations.pipelineStalls.syncCpDma;
+    marker.invalTcp = operations.caches.invalTcp;
+    marker.invalSqI = operations.caches.invalSqI$;
+    marker.invalSqK = operations.caches.invalSqK$;
+    marker.flushTcc = operations.caches.flushTcc;
+    marker.invalTcc = operations.caches.invalTcc;
+    marker.flushCb = operations.caches.flushCb;
+    marker.invalCb = operations.caches.invalCb;
+    marker.flushDb = operations.caches.flushDb;
+    marker.invalDb = operations.caches.invalDb;
 
     marker.numLayoutTransitions = 0;
 
@@ -830,9 +795,9 @@ void RgpCaptureMgr::WriteBarrierEndMarker(
 
 // ================================================================================================
 // Inserts a user event string marker
-void RgpCaptureMgr::WriteUserEventMarker(
-  const VirtualGPU* gpu, RgpSqttMarkerUserEventType eventType, const std::string& name) const
-{
+void RgpCaptureMgr::WriteUserEventMarker(const VirtualGPU* gpu,
+                                         RgpSqttMarkerUserEventType eventType,
+                                         const std::string& name) const {
   memset(user_event_, 0, sizeof(RgpSqttMarkerUserEventWithString));
 
   user_event_->header.identifier = RgpSqttMarkerIdentifierUserEvent;
@@ -841,7 +806,8 @@ void RgpCaptureMgr::WriteUserEventMarker(
   size_t markerSize = sizeof(user_event_->header);
 
   if ((eventType != RgpSqttMarkerUserEventPop)) {
-    size_t strLength = std::min(name.size(), RgpSqttMaxUserEventStringLengthInDwords * sizeof(uint32_t));
+    size_t strLength =
+        std::min(name.size(), RgpSqttMaxUserEventStringLengthInDwords * sizeof(uint32_t));
     for (uint32_t charIdx = 0; charIdx < strLength; ++charIdx) {
       uint32_t c = static_cast<uint32_t>(name[charIdx]);
       user_event_->stringData[charIdx / 4] |= (c << (8 * (charIdx % 4)));
@@ -859,4 +825,4 @@ void RgpCaptureMgr::WriteUserEventMarker(
 }
 
 
-}; // namespace vk
+};  // namespace pal
diff --git a/projects/clr/rocclr/runtime/device/pal/palgpuopen.hpp b/projects/clr/rocclr/runtime/device/pal/palgpuopen.hpp
index 52789a581e..af56f6efd3 100644
--- a/projects/clr/rocclr/runtime/device/pal/palgpuopen.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palgpuopen.hpp
@@ -34,42 +34,36 @@
 #include "gpuopen.h"
 
 // PAL forward declarations
-namespace Pal
-{
-class  ICmdBuffer;
-class  IFence;
-class  IQueueSemaphore;
+namespace Pal {
+class ICmdBuffer;
+class IFence;
+class IQueueSemaphore;
 struct PalPublicSettings;
-}
+}  // namespace Pal
 
 // GpuUtil forward declarations
-namespace GpuUtil
-{                     
+namespace GpuUtil {
 class GpaSession;
 };
 
 // GPUOpen forward declarations
-namespace DevDriver
-{
+namespace DevDriver {
 class DevDriverServer;
 class IMsgChannel;
 struct MessageBuffer;
 
-namespace DriverControlProtocol
-{
+namespace DriverControlProtocol {
 enum struct DeviceClockMode : uint32_t;
 class HandlerServer;
-}
+}  // namespace DriverControlProtocol
 
-namespace SettingsProtocol
-{
+namespace SettingsProtocol {
 class HandlerServer;
 }
 
-}
+}  // namespace DevDriver
 
-namespace pal
-{
+namespace pal {
 class Settings;
 class Device;
 class VirtualGPU;
@@ -77,8 +71,7 @@ class HSAILKernel;
 
 // ================================================================================================
 // RgpSqttMarkerIdentifier - Identifiers for RGP SQ thread-tracing markers (Table 1)
-enum RgpSqttMarkerIdentifier : uint32_t
-{
+enum RgpSqttMarkerIdentifier : uint32_t {
   RgpSqttMarkerIdentifierEvent = 0x0,
   RgpSqttMarkerIdentifierCbStart = 0x1,
   RgpSqttMarkerIdentifierCbEnd = 0x2,
@@ -98,8 +91,7 @@ enum RgpSqttMarkerIdentifier : uint32_t
 };
 
 // ================================================================================================
-enum class RgpSqttMarkerEventType : uint32_t
-{
+enum class RgpSqttMarkerEventType : uint32_t {
   CmdNDRangeKernel = 0,
   CmdScheduler = 1,
   CmdCopyBuffer = 2,
@@ -114,8 +106,7 @@ enum class RgpSqttMarkerEventType : uint32_t
 };
 
 // ================================================================================================
-enum class RgpSqqtBarrierReason : uint32_t
-{
+enum class RgpSqqtBarrierReason : uint32_t {
   Invalid = 0,
   MemDependency = 0xC0000000,
   ProfilingControl = 0xC0000001,
@@ -125,129 +116,116 @@ enum class RgpSqqtBarrierReason : uint32_t
 };
 
 // ================================================================================================
-// RgpSqttMarkerEvent - "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker.  
+// RgpSqttMarkerEvent - "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker.
 // These are generated ahead of draws or dispatches for commands that trigger generation of waves
 //  i.e. draws/dispatches (Table 4).
-struct RgpSqttMarkerEvent
-{
-  union
-  {
-    struct
-    {
-      uint32_t identifier : 4;    // Identifier for this marker
-      uint32_t extDwords : 3;     // Number of extra dwords following this marker
-      uint32_t apiType : 24;      // The API type for this command
-      uint32_t hasThreadDims : 1; // Whether thread dimensions are included
+struct RgpSqttMarkerEvent {
+  union {
+    struct {
+      uint32_t identifier : 4;     // Identifier for this marker
+      uint32_t extDwords : 3;      // Number of extra dwords following this marker
+      uint32_t apiType : 24;       // The API type for this command
+      uint32_t hasThreadDims : 1;  // Whether thread dimensions are included
     };
 
-    uint32_t     dword01;            // The first dword
+    uint32_t dword01;  // The first dword
   };
 
-  union
-  {
-    // Some information about the vertex/instance/draw register indices.  These values are not 
+  union {
+    // Some information about the vertex/instance/draw register indices.  These values are not
     // always valid because they are not available for one reason or another:
     //
     // - If vertex offset index or instance offset index are not (together) valid, they are both
     //  equal to 0
     // - If draw index is not valid, it is equal to the vertex offset index
-    struct
-    {
-      uint32_t cbID : 20; // Command buffer ID for this marker
+    struct {
+      uint32_t cbID : 20;               // Command buffer ID for this marker
       uint32_t vertexOffsetRegIdx : 4;  // SPI userdata register index for the first vertex offset
-      uint32_t instanceOffsetRegIdx : 4;  // SPI userdata register index for the first instance offset
-      uint32_t drawIndexRegIdx : 4;  // SPI userdata register index for the draw index (multi draw indirect)
+      uint32_t
+          instanceOffsetRegIdx : 4;  // SPI userdata register index for the first instance offset
+      uint32_t drawIndexRegIdx : 4;  // SPI userdata register index for the draw index (multi draw
+                                     // indirect)
     };
-    uint32_t     dword02; // The second dword
+    uint32_t dword02;  // The second dword
   };
 
-  union
-  {
-    uint32_t cmdID;      // Command index within the command buffer
-    uint32_t dword03;    // The third dword
+  union {
+    uint32_t cmdID;    // Command index within the command buffer
+    uint32_t dword03;  // The third dword
   };
 };
 
 // ================================================================================================
 // RgpSqttMarkerEventWithDims - Per-dispatch specific marker where workgroup dims are included
-struct RgpSqttMarkerEventWithDims
-{
-  RgpSqttMarkerEvent event;   // Per-draw/dispatch marker.  API type should be Dispatch, threadDim = 1
-  uint32_t           threadX; // Work group count in X
-  uint32_t           threadY; // Work group count in Y
-  uint32_t           threadZ; // Work group count in Z
+struct RgpSqttMarkerEventWithDims {
+  RgpSqttMarkerEvent
+      event;         // Per-draw/dispatch marker.  API type should be Dispatch, threadDim = 1
+  uint32_t threadX;  // Work group count in X
+  uint32_t threadY;  // Work group count in Y
+  uint32_t threadZ;  // Work group count in Z
 };
 
 // ================================================================================================
 // RgpSqttMarkerBarrierStart - "Barrier Start" RGP SQTT instrumentation marker (Table 5)
-struct RgpSqttMarkerBarrierStart
-{
-  union
-  {
-    struct
-    {
+struct RgpSqttMarkerBarrierStart {
+  union {
+    struct {
       uint32_t identifier : 4;  // Identifier for this marker
       uint32_t extDwords : 3;   // Number of extra dwords following this marker
       uint32_t cbId : 20;       // Command buffer ID within queue
       uint32_t reserved : 5;    // Reserved
     };
 
-    uint32_t     dword01;            // The first dword
+    uint32_t dword01;  // The first dword
   };
 
-  union
-  {
-    struct
-    {
+  union {
+    struct {
       uint32_t driverReason : 31;
-      uint32_t internal: 1;
+      uint32_t internal : 1;
     };
 
-    uint32_t     dword02;            // The second dword
+    uint32_t dword02;  // The second dword
   };
 };
 
 // ================================================================================================
 // RgpSqttMarkerBarrierEnd - "Barrier End" RGP SQTT instrumentation marker (Table 6)
-struct RgpSqttMarkerBarrierEnd
-{
-  union
-  {
-    struct
-    {
-      uint32_t identifier : 4;  // Identifier for this marker
-      uint32_t extDwords : 3;   // Number of extra dwords following this marker
-      uint32_t cbId : 20;       // Command buffer ID within queue
-      uint32_t waitOnEopTs : 1; // Issued EOP_TS VGT event followed by a WAIT_REG_MEM for that timestamp
-                                // to be written.  Quintessential full pipeline stall.
+struct RgpSqttMarkerBarrierEnd {
+  union {
+    struct {
+      uint32_t identifier : 4;   // Identifier for this marker
+      uint32_t extDwords : 3;    // Number of extra dwords following this marker
+      uint32_t cbId : 20;        // Command buffer ID within queue
+      uint32_t waitOnEopTs : 1;  // Issued EOP_TS VGT event followed by a WAIT_REG_MEM for that
+                                 // timestamp to be written.  Quintessential full pipeline stall.
       uint32_t vsPartialFlush : 1;  // Stall at ME waiting for all prior VS waves to complete.
       uint32_t psPartialFlush : 1;  // Stall at ME waiting for all prior PS waves to complete.
       uint32_t csPartialFlush : 1;  // Stall at ME waiting for all prior CS waves to complete.
-      uint32_t pfpSyncMe : 1;   // Stall PFP until ME is at same point in command stream.
+      uint32_t pfpSyncMe : 1;       // Stall PFP until ME is at same point in command stream.
     };
 
-    uint32_t     dword01;             // The first dword
+    uint32_t dword01;  // The first dword
   };
 
-  union
-  {
-    struct
-    {
-      uint32_t syncCpDma : 1;  // Issue dummy CP-DMA command to confirm all prior CP-DMAs have completed.
+  union {
+    struct {
+      uint32_t
+          syncCpDma : 1;  // Issue dummy CP-DMA command to confirm all prior CP-DMAs have completed.
       uint32_t invalTcp : 1;  // Invalidate the L1 vector caches.
       uint32_t invalSqI : 1;  // Invalidate the SQ instruction caches
       uint32_t invalSqK : 1;  // Invalidate the SQ constant caches (i.e. L1 scalar caches)
       uint32_t flushTcc : 1;  // Flush L2
       uint32_t invalTcc : 1;  // Invalidate L2
-      uint32_t flushCb : 1;  // Flush CB caches (including DCC, cmask, fmask)
-      uint32_t invalCb : 1;  // Invalidate CB caches (including DCC, cmask, fmask)
-      uint32_t flushDb : 1;  // Flush DB caches (including htile)
-      uint32_t invalDb : 1;  // Invalidate DB caches (including htile)
-      uint32_t numLayoutTransitions : 16; // Number of layout transitions following this packet
-      uint32_t reserved : 6;  // Reserved for future expansion.  Always 0
+      uint32_t flushCb : 1;   // Flush CB caches (including DCC, cmask, fmask)
+      uint32_t invalCb : 1;   // Invalidate CB caches (including DCC, cmask, fmask)
+      uint32_t flushDb : 1;   // Flush DB caches (including htile)
+      uint32_t invalDb : 1;   // Invalidate DB caches (including htile)
+      uint32_t numLayoutTransitions : 16;  // Number of layout transitions following this packet
+      uint32_t reserved : 6;               // Reserved for future expansion.  Always 0
     };
 
-    uint32_t  dword02;                // The second dword
+    uint32_t dword02;  // The second dword
   };
 };
 
@@ -255,33 +233,31 @@ struct RgpSqttMarkerBarrierEnd
 constexpr uint32_t RgpSqttInstrumentationSpecVersion = 1;
 
 // RGP SQTT Instrumentation Specification version for Vulkan-specific tables
-constexpr uint32_t RgpSqttInstrumentationApiVersion  = 0;
+constexpr uint32_t RgpSqttInstrumentationApiVersion = 0;
 
-// RgpSqttMarkeUserEventDataType - Data types used in RGP SQ thread-tracing markers for an user event
-enum RgpSqttMarkerUserEventType : uint32_t
-{
-    RgpSqttMarkerUserEventTrigger = 0x0,
-    RgpSqttMarkerUserEventPop = 0x1,
-    RgpSqttMarkerUserEventPush = 0x2,
-    RgpSqttMarkerUserEventObjectName = 0x3,
-    RgpSqttMarkerUserEventReserved1 = 0x4,
-    RgpSqttMarkerUserEventReserved2 = 0x5,
-    RgpSqttMarkerUserEventReserved3 = 0x6,
-    RgpSqttMarkerUserEventReserved4 = 0x7,
+// RgpSqttMarkeUserEventDataType - Data types used in RGP SQ thread-tracing markers for an user
+// event
+enum RgpSqttMarkerUserEventType : uint32_t {
+  RgpSqttMarkerUserEventTrigger = 0x0,
+  RgpSqttMarkerUserEventPop = 0x1,
+  RgpSqttMarkerUserEventPush = 0x2,
+  RgpSqttMarkerUserEventObjectName = 0x3,
+  RgpSqttMarkerUserEventReserved1 = 0x4,
+  RgpSqttMarkerUserEventReserved2 = 0x5,
+  RgpSqttMarkerUserEventReserved3 = 0x6,
+  RgpSqttMarkerUserEventReserved4 = 0x7,
 };
 
 // RgpSqttMarkerUserEvent - RGP SQ thread-tracing marker for an user event.
-union RgpSqttMarkerUserEvent
-{
-    struct
-    {
-        uint32_t identifier : 4;  // Identifier for this marker
-        uint32_t extDwords : 8;  // Number of extra dwords following this marker
-        uint32_t dataType : 8;  // The type for this marker
-        uint32_t reserved : 12; // reserved
-    };
+union RgpSqttMarkerUserEvent {
+  struct {
+    uint32_t identifier : 4;  // Identifier for this marker
+    uint32_t extDwords : 8;   // Number of extra dwords following this marker
+    uint32_t dataType : 8;    // The type for this marker
+    uint32_t reserved : 12;   // reserved
+  };
 
-    uint32_t dword01;                               // The first dword
+  uint32_t dword01;  // The first dword
 };
 
 constexpr uint32_t RgpSqttMarkerUserEventWordCount = 1;
@@ -289,21 +265,20 @@ constexpr uint32_t RgpSqttMarkerUserEventWordCount = 1;
 // The max lengths of frame marker strings
 static constexpr size_t RgpSqttMaxUserEventStringLengthInDwords = 1024;
 
-// RgpSqttMarkerUserEvent - RGP SQ thread-tracing marker for an user event with a string (push and trigger data types)
-struct RgpSqttMarkerUserEventWithString
-{
-    RgpSqttMarkerUserEvent header;
+// RgpSqttMarkerUserEvent - RGP SQ thread-tracing marker for an user event with a string (push and
+// trigger data types)
+struct RgpSqttMarkerUserEventWithString {
+  RgpSqttMarkerUserEvent header;
 
-    uint32_t stringLength;                                        // Length of the string (in characters)
-    uint32_t stringData[RgpSqttMaxUserEventStringLengthInDwords]; // String data in UTF-8 format
+  uint32_t stringLength;  // Length of the string (in characters)
+  uint32_t stringData[RgpSqttMaxUserEventStringLengthInDwords];  // String data in UTF-8 format
 };
 
 // ================================================================================================
 // This class provides functionality to interact with the GPU Open Developer Mode message passing
 // service and the rest of the driver.
-class RgpCaptureMgr
-{
-public:
+class RgpCaptureMgr {
+ public:
   ~RgpCaptureMgr();
 
   static RgpCaptureMgr* Create(Pal::IPlatform* platform, const Device& device);
@@ -321,45 +296,42 @@ public:
 
   bool IsQueueTimingActive() const;
 
-  void WriteBarrierStartMarker(
-    const VirtualGPU* gpu, const Pal::Developer::BarrierData& data) const;
-  void WriteBarrierEndMarker(
-    const VirtualGPU* gpu, const Pal::Developer::BarrierData& data) const;
+  void WriteBarrierStartMarker(const VirtualGPU* gpu,
+                               const Pal::Developer::BarrierData& data) const;
+  void WriteBarrierEndMarker(const VirtualGPU* gpu, const Pal::Developer::BarrierData& data) const;
   bool RegisterTimedQueue(uint32_t queue_id, Pal::IQueue* iQueue, bool* debug_vmid) const;
-  Pal::Result TimedQueueSubmit(
-    Pal::IQueue* queue, uint64_t cmdId, const Pal::SubmitInfo& submitInfo) const;
+  Pal::Result TimedQueueSubmit(Pal::IQueue* queue, uint64_t cmdId,
+                               const Pal::SubmitInfo& submitInfo) const;
 
-private:
+ private:
   // Steps that an RGP trace goes through
-  enum class TraceStatus
-  {
-      Idle = 0,           // No active trace and none requested
-      Preparing,          // A trace has been requested but is not active yet because we are
-                          // currently sampling timing information over some number of lead frames.
-      Running,            // SQTT and queue timing is currently active for all command buffer submits.
-      WaitingForSqtt,
-      WaitingForResults   // Tracing is no longer active, but all results are not yet ready.
+  enum class TraceStatus {
+    Idle = 0,   // No active trace and none requested
+    Preparing,  // A trace has been requested but is not active yet because we are
+                // currently sampling timing information over some number of lead frames.
+    Running,    // SQTT and queue timing is currently active for all command buffer submits.
+    WaitingForSqtt,
+    WaitingForResults  // Tracing is no longer active, but all results are not yet ready.
   };
 
   // All per-device state to support RGP tracing
-  struct TraceState
-  {
-    TraceStatus   status_;              // Current trace status (idle, running, etc.)
+  struct TraceState {
+    TraceStatus status_;  // Current trace status (idle, running, etc.)
 
-    GpuEvent      begin_sqtt_event_;    // Event that is signaled when a trace-end cmdbuf retires
-    GpuEvent      end_sqtt_event_;      // Event that is signaled when a trace-end cmdbuf retires
-    GpuEvent      end_event_;           // Event that is signaled when a trace-end cmdbuf retires
+    GpuEvent begin_sqtt_event_;  // Event that is signaled when a trace-end cmdbuf retires
+    GpuEvent end_sqtt_event_;    // Event that is signaled when a trace-end cmdbuf retires
+    GpuEvent end_event_;         // Event that is signaled when a trace-end cmdbuf retires
 
-    VirtualGPU*   prepare_queue_;       // The queue that triggered the full start of a trace
-    VirtualGPU*   begin_queue_;         // The queue that triggered starting SQTT
+    VirtualGPU* prepare_queue_;  // The queue that triggered the full start of a trace
+    VirtualGPU* begin_queue_;    // The queue that triggered starting SQTT
 
-    GpuUtil::GpaSession*  gpa_session_; // GPA session helper object for building RGP data
-    uint32_t      gpa_sample_id_;       // Sample ID associated with the current trace
-    bool          queue_timing_;        // Queue timing is enabled
+    GpuUtil::GpaSession* gpa_session_;  // GPA session helper object for building RGP data
+    uint32_t gpa_sample_id_;            // Sample ID associated with the current trace
+    bool queue_timing_;                 // Queue timing is enabled
 
-    uint32_t      prepared_disp_count_; // Number of dispatches counted while preparing for a trace
-    uint32_t      sqtt_disp_count_;     // Number of dispatches counted while SQTT tracing is active
-    mutable uint32_t current_event_id_; // Current event ID
+    uint32_t prepared_disp_count_;  // Number of dispatches counted while preparing for a trace
+    uint32_t sqtt_disp_count_;      // Number of dispatches counted while SQTT tracing is active
+    mutable uint32_t current_event_id_;  // Current event ID
   };
 
   RgpCaptureMgr(Pal::IPlatform* platform, const Device& device);
@@ -374,25 +346,25 @@ private:
   static bool GpuSupportsTracing(const Pal::DeviceProperties& props, const Settings& settings);
   RgpSqttMarkerEvent BuildEventMarker(const VirtualGPU* gpu, RgpSqttMarkerEventType api_type) const;
   void WriteMarker(const VirtualGPU* gpu, const void* data, size_t data_size) const;
-  void WriteEventWithDimsMarker(const VirtualGPU* gpu, RgpSqttMarkerEventType apiType,
-    uint32_t x, uint32_t y, uint32_t z) const;
+  void WriteEventWithDimsMarker(const VirtualGPU* gpu, RgpSqttMarkerEventType apiType, uint32_t x,
+                                uint32_t y, uint32_t z) const;
   void WriteUserEventMarker(const VirtualGPU* gpu, RgpSqttMarkerUserEventType eventType,
-    const std::string& name) const;
+                            const std::string& name) const;
 
-  const Device&               device_;
+  const Device& device_;
   DevDriver::DevDriverServer* dev_driver_server_;
   DevDriver::RGPProtocol::RGPServer* rgp_server_;
-  mutable amd::Monitor        trace_mutex_;
-  TraceState                  trace_;
+  mutable amd::Monitor trace_mutex_;
+  TraceState trace_;
   RgpSqttMarkerUserEventWithString* user_event_;
 
-  uint32_t  num_prep_disp_;
-  uint32_t  max_sqtt_disp_;       // Maximum number of the dispatches allowed in the trace
-  uint32_t  trace_gpu_mem_limit_;
-  uint32_t  global_disp_count_;
+  uint32_t num_prep_disp_;
+  uint32_t max_sqtt_disp_;  // Maximum number of the dispatches allowed in the trace
+  uint32_t trace_gpu_mem_limit_;
+  uint32_t global_disp_count_;
 
-  bool  trace_enabled_;         // True if tracing is currently enabled (master flag)
-  bool  inst_tracing_enabled_;  // Enable instruction-level SQTT tokens
+  bool trace_enabled_;         // True if tracing is currently enabled (master flag)
+  bool inst_tracing_enabled_;  // Enable instruction-level SQTT tokens
 
   PAL_DISALLOW_DEFAULT_CTOR(RgpCaptureMgr);
   PAL_DISALLOW_COPY_AND_ASSIGN(RgpCaptureMgr);
@@ -400,11 +372,9 @@ private:
 
 // ================================================================================================
 // Returns true if queue operations are currently being timed by RGP traces.
-inline bool RgpCaptureMgr::IsQueueTimingActive() const
-{
+inline bool RgpCaptureMgr::IsQueueTimingActive() const {
   return (trace_.queue_timing_ &&
-          (trace_.status_ == TraceStatus::Running ||
-           trace_.status_ == TraceStatus::Preparing ||
+          (trace_.status_ == TraceStatus::Running || trace_.status_ == TraceStatus::Preparing ||
            trace_.status_ == TraceStatus::WaitingForSqtt));
 }
-};
+};  // namespace pal
diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
index e23389876b..7a4823ddaa 100644
--- a/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palkernel.cpp
@@ -27,11 +27,9 @@ typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
 namespace pal {
 
 void HSAILKernel::setWorkGroupInfo(const uint32_t privateSegmentSize,
-                                   const uint32_t groupSegmentSize,
-                                   const uint16_t numSGPRs,
+                                   const uint32_t groupSegmentSize, const uint16_t numSGPRs,
                                    const uint16_t numVGPRs) {
-  workGroupInfo_.scratchRegs_ =
-      amd::alignUp(privateSegmentSize, 16) / sizeof(uint);
+  workGroupInfo_.scratchRegs_ = amd::alignUp(privateSegmentSize, 16) / sizeof(uint);
   workGroupInfo_.privateMemSize_ = privateSegmentSize;
   workGroupInfo_.localMemSize_ = workGroupInfo_.usedLDSSize_ = groupSegmentSize;
   workGroupInfo_.usedSGPRs_ = numSGPRs;
@@ -63,13 +61,13 @@ bool HSAILKernel::setKernelCode(amd::hsa::loader::Symbol* sym, amd_kernel_code_t
   }
 
   // Copy code object of this kernel from the program CPU segment
-  memcpy(akc, reinterpret_cast<void*>(prog().findHostKernelAddress(code_)), sizeof(amd_kernel_code_t));
+  memcpy(akc, reinterpret_cast<void*>(prog().findHostKernelAddress(code_)),
+         sizeof(amd_kernel_code_t));
 
   return true;
 }
 
 bool HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol* sym) {
-
   amd_kernel_code_t* akc = &akc_;
 
   if (!setKernelCode(sym, akc)) {
@@ -77,18 +75,16 @@ bool HSAILKernel::aqlCreateHWInfo(amd::hsa::loader::Symbol* sym) {
   }
 
   if (!sym->GetInfo(HSA_EXT_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT_SIZE,
-    reinterpret_cast<void*>(&codeSize_))) {
+                    reinterpret_cast<void*>(&codeSize_))) {
     return false;
   }
 
-    // Setup the the workgroup info
-  setWorkGroupInfo(akc->workitem_private_segment_byte_size,
-                   akc->workgroup_group_segment_byte_size,
-                   akc->wavefront_sgpr_count,
-                   akc->workitem_vgpr_count);
+  // Setup the the workgroup info
+  setWorkGroupInfo(akc->workitem_private_segment_byte_size, akc->workgroup_group_segment_byte_size,
+                   akc->wavefront_sgpr_count, akc->workitem_vgpr_count);
 
   workgroupGroupSegmentByteSize_ = workGroupInfo_.usedLDSSize_;
-  kernargSegmentByteSize_ =  akc->kernarg_segment_byte_size;
+  kernargSegmentByteSize_ = akc->kernarg_segment_byte_size;
   spillSegmentByteSize_ = amd::alignUp(workGroupInfo_.privateMemSize_, sizeof(uint32_t));
 
   return true;
@@ -102,16 +98,14 @@ HSAILKernel::HSAILKernel(std::string name, HSAILProgram* prog, std::string compi
       codeSize_(0),
       workgroupGroupSegmentByteSize_(0),
       kernargSegmentByteSize_(0),
-      spillSegmentByteSize_(0)
- {
+      spillSegmentByteSize_(0) {
   flags_.hsa_ = true;
 }
 
-HSAILKernel::~HSAILKernel() {
-}
+HSAILKernel::~HSAILKernel() {}
 
 bool HSAILKernel::init(amd::hsa::loader::Symbol* sym, bool finalize) {
-#if  defined(WITH_COMPILER_LIB)
+#if defined(WITH_COMPILER_LIB)
   acl_error error = ACL_SUCCESS;
   std::string openClKernelName = openclMangledName(name());
   flags_.internalKernel_ =
@@ -274,12 +268,14 @@ const HSAILProgram& HSAILKernel::prog() const {
   return reinterpret_cast<const HSAILProgram&>(prog_);
 }
 
-hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(
-    VirtualGPU& gpu, const amd::Kernel& kernel, const amd::NDRangeContainer& sizes,
-    const_address parameters, size_t ldsAddress, uint64_t vmDefQueue, uint64_t* vmParentWrap) const {
+hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(VirtualGPU& gpu, const amd::Kernel& kernel,
+                                                         const amd::NDRangeContainer& sizes,
+                                                         const_address parameters,
+                                                         size_t ldsAddress, uint64_t vmDefQueue,
+                                                         uint64_t* vmParentWrap) const {
   uint64_t argList;
   address aqlArgBuf = gpu.managedBuffer().reserve(
-    argsBufferSize() + sizeof(hsa_kernel_dispatch_packet_t), &argList);
+      argsBufferSize() + sizeof(hsa_kernel_dispatch_packet_t), &argList);
   gpu.addVmMemory(gpu.managedBuffer().activeMemory());
 
   if (dynamicParallelism()) {
@@ -307,8 +303,8 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(
         break;
       case amd::KernelParameterDescriptor::HiddenGlobalOffsetY:
         if (sizes.dimensions() >= 2) {
-            offset = sizes.offset()[1];
-            WriteAqlArgAt(const_cast<address>(parameters), &offset, it.size_, it.offset_);
+          offset = sizes.offset()[1];
+          WriteAqlArgAt(const_cast<address>(parameters), &offset, it.size_, it.offset_);
         }
         break;
       case amd::KernelParameterDescriptor::HiddenGlobalOffsetZ:
@@ -322,8 +318,7 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(
             // and printf buffer was allocated
             (gpu.printfDbgHSA().dbgBuffer() != nullptr)) {
           // and set the fourth argument as the printf_buffer pointer
-          size_t bufferPtr = static_cast<size_t>(gpu.printfDbgHSA().
-            dbgBuffer()->vmAddress());
+          size_t bufferPtr = static_cast<size_t>(gpu.printfDbgHSA().dbgBuffer()->vmAddress());
           gpu.addVmMemory(gpu.printfDbgHSA().dbgBuffer());
           WriteAqlArgAt(const_cast<address>(parameters), &bufferPtr, it.size_, it.offset_);
         }
@@ -346,11 +341,11 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(
   // Note: In a case of structs the size won't match,
   // since HSAIL compiler expects a reference...
   assert(argsBufferSize() <= signature.paramsSize() &&
-    "A mismatch of sizes of arguments between compiler and runtime!");
+         "A mismatch of sizes of arguments between compiler and runtime!");
 
-  //hsa_kernel_dispatch_packet_t disp;
-  hsa_kernel_dispatch_packet_t* hsaDisp = reinterpret_cast<hsa_kernel_dispatch_packet_t*>(
-    gpu.cb(0)->SysMemCopy());
+  // hsa_kernel_dispatch_packet_t disp;
+  hsa_kernel_dispatch_packet_t* hsaDisp =
+      reinterpret_cast<hsa_kernel_dispatch_packet_t*>(gpu.cb(0)->SysMemCopy());
 
   amd::NDRange local(sizes.local());
   const amd::NDRange& global = sizes.global();
@@ -359,10 +354,10 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(
   FindLocalWorkSize(sizes.dimensions(), sizes.global(), local);
 
   constexpr uint16_t kDispatchPacketHeader =
-    (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
-    (1 << HSA_PACKET_HEADER_BARRIER) |
-    (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
-    (HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
+      (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
+      (1 << HSA_PACKET_HEADER_BARRIER) |
+      (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
+      (HSA_FENCE_SCOPE_AGENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
 
   hsaDisp->header = kDispatchPacketHeader;
   hsaDisp->setup = sizes.dimensions();
@@ -387,7 +382,7 @@ hsa_kernel_dispatch_packet_t* HSAILKernel::loadArguments(
   memcpy(aqlArgBuf + argsBufferSize(), hsaDisp, sizeof(hsa_kernel_dispatch_packet_t));
 
   if (AMD_HSA_BITS_GET(akc_.kernel_code_properties,
-      AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR)) {
+                       AMD_KERNEL_CODE_PROPERTIES_ENABLE_SGPR_QUEUE_PTR)) {
     gpu.addVmMemory(gpu.hsaQueueMem());
   }
 
@@ -407,7 +402,7 @@ static const KernelMD* FindKernelMetadata(const CodeObjectMD* programMD, const s
   }
   return nullptr;
 }
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
 
 #if defined(USE_COMGR_LIBRARY)
 bool LightningKernel::init() {
@@ -419,7 +414,7 @@ bool LightningKernel::init() {
     return false;
   }
 
-  KernelMD  kernelMD;
+  KernelMD kernelMD;
   if (!GetAttrCodePropMetadata(*kernelMetaNode, &kernelMD)) {
     return false;
   }
@@ -427,8 +422,8 @@ bool LightningKernel::init() {
   symbolName_ = (codeObjectVer() == 2) ? name() : kernelMD.mSymbolName;
 
   workgroupGroupSegmentByteSize_ = kernelMD.mCodeProps.mGroupSegmentFixedSize;
-  spillSegmentByteSize_ = amd::alignUp(kernelMD.mCodeProps.mPrivateSegmentFixedSize,
-                                       sizeof(uint32_t));
+  spillSegmentByteSize_ =
+      amd::alignUp(kernelMD.mCodeProps.mPrivateSegmentFixedSize, sizeof(uint32_t));
   kernargSegmentByteSize_ = kernelMD.mCodeProps.mKernargSegmentSize;
 
   // Copy codeobject of this kernel from the program CPU segment
@@ -451,7 +446,7 @@ bool LightningKernel::init() {
 
     // Get the runtime handle symbol GPU address
     rth_symbol = prog().GetSymbol(const_cast<char*>(kernelMD.mAttrs.mRuntimeHandle.c_str()),
-                                const_cast<hsa_agent_t*>(&agent));
+                                  const_cast<hsa_agent_t*>(&agent));
     uint64_t symbol_address;
     rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
 
@@ -461,19 +456,14 @@ bool LightningKernel::init() {
     uint64_t kernel_object = gpuAqlCode();
     VirtualGPU* gpu = codeSegGpu.dev().xferQueue();
 
-    const struct RuntimeHandle runtime_handle = {
-        gpuAqlCode(),
-        spillSegSize(),
-        ldsSize()
-    };
+    const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()};
 
     codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true);
   }
 
   // Setup the the workgroup info
   setWorkGroupInfo(kernelMD.mCodeProps.mPrivateSegmentFixedSize,
-                   kernelMD.mCodeProps.mGroupSegmentFixedSize,
-                   kernelMD.mCodeProps.mNumSGPRs,
+                   kernelMD.mCodeProps.mGroupSegmentFixedSize, kernelMD.mCodeProps.mNumSGPRs,
                    kernelMD.mCodeProps.mNumVGPRs);
 
   // Copy wavefront size
@@ -499,10 +489,10 @@ bool LightningKernel::init() {
 
   return true;
 }
-#endif // defined(USE_COMGR_LIBRARY)
+#endif  // defined(USE_COMGR_LIBRARY)
 
 bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
-#if defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY)
+#if defined(WITH_LIGHTNING_COMPILER) && !defined(USE_COMGR_LIBRARY)
   flags_.internalKernel_ =
       (compileOptions_.find("-cl-internal-kernel") != std::string::npos) ? true : false;
 
@@ -545,7 +535,7 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
 
     // Get the runtime handle symbol GPU address
     rth_symbol = prog().GetSymbol(const_cast<char*>(kernelMD->mAttrs.mRuntimeHandle.c_str()),
-                                const_cast<hsa_agent_t*>(&agent));
+                                  const_cast<hsa_agent_t*>(&agent));
     uint64_t symbol_address;
     rth_symbol->GetInfo(HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &symbol_address);
 
@@ -554,11 +544,7 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
     uint64_t offset = symbol_address - codeSegGpu.vmAddress();
     VirtualGPU* gpu = codeSegGpu.dev().xferQueue();
 
-    const struct RuntimeHandle runtime_handle = {
-        gpuAqlCode(),
-        spillSegSize(),
-        ldsSize()
-    };
+    const struct RuntimeHandle runtime_handle = {gpuAqlCode(), spillSegSize(), ldsSize()};
 
     codeSegGpu.writeRawData(*gpu, offset, sizeof(runtime_handle), &runtime_handle, true);
   }
@@ -584,7 +570,7 @@ bool LightningKernel::init(amd::hsa::loader::Symbol* symbol) {
 
   waveLimiter_.enable();
   */
-#endif // defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY)
+#endif  // defined(WITH_LIGHTNING_COMPILER) && ! defined(USE_COMGR_LIBRARY)
   return true;
 }
 
diff --git a/projects/clr/rocclr/runtime/device/pal/palkernel.hpp b/projects/clr/rocclr/runtime/device/pal/palkernel.hpp
index 5a1abe07d4..926d2deccc 100644
--- a/projects/clr/rocclr/runtime/device/pal/palkernel.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palkernel.hpp
@@ -20,14 +20,14 @@ namespace amd {
 namespace hsa {
 namespace loader {
 class Symbol;
-}  // loader
+}  // namespace loader
 namespace code {
 namespace Kernel {
 class Metadata;
-}  // Kernel
-}  // code
-}  // hsa
-}  // amd
+}  // namespace Kernel
+}  // namespace code
+}  // namespace hsa
+}  // namespace amd
 
 //! \namespace pal PAL Device Implementation
 namespace pal {
@@ -43,7 +43,6 @@ class LightningProgram;
  */
 class HSAILKernel : public device::Kernel {
  public:
-
   HSAILKernel(std::string name, HSAILProgram* prog, std::string compileOptions);
 
   virtual ~HSAILKernel();
@@ -106,21 +105,19 @@ class HSAILKernel : public device::Kernel {
   bool setKernelCode(amd::hsa::loader::Symbol* sym, amd_kernel_code_t* akc);
 
   //! Set up the workgroup info based on the kernel metadata
-  void setWorkGroupInfo(const uint32_t privateSegmentSize,
-                        const uint32_t groupSegmentSize,
-                        const uint16_t numSGPRs,
-                        const uint16_t numVGPRs);
+  void setWorkGroupInfo(const uint32_t privateSegmentSize, const uint32_t groupSegmentSize,
+                        const uint16_t numSGPRs, const uint16_t numVGPRs);
 
-  std::string compileOptions_;    //!< compile used for finalizing this kernel
-  amd_kernel_code_t akc_;         //!< AQL kernel code on CPU
-  uint index_;                    //!< Kernel index in the program
+  std::string compileOptions_;  //!< compile used for finalizing this kernel
+  amd_kernel_code_t akc_;       //!< AQL kernel code on CPU
+  uint index_;                  //!< Kernel index in the program
 
-  uint64_t code_;     //!< GPU memory pointer to the kernel
-  size_t codeSize_;   //!< Size of ISA code
+  uint64_t code_;    //!< GPU memory pointer to the kernel
+  size_t codeSize_;  //!< Size of ISA code
 
-  uint32_t workgroupGroupSegmentByteSize_;    //!< LDS size used in the kernel
-  uint32_t kernargSegmentByteSize_;           //!< Size of kernel argument buffer
-  uint32_t spillSegmentByteSize_;             //!< Spill reg size per workitem
+  uint32_t workgroupGroupSegmentByteSize_;  //!< LDS size used in the kernel
+  uint32_t kernargSegmentByteSize_;         //!< Size of kernel argument buffer
+  uint32_t spillSegmentByteSize_;           //!< Spill reg size per workitem
 };
 
 class LightningKernel : public HSAILKernel {
@@ -140,4 +137,5 @@ class LightningKernel : public HSAILKernel {
 #endif
 };
 
-/*@}*/} // namespace pal
+/*@}*/  // namespace pal
+}  // namespace pal
diff --git a/projects/clr/rocclr/runtime/device/pal/palmemory.cpp b/projects/clr/rocclr/runtime/device/pal/palmemory.cpp
index 071f17962a..bad5652845 100644
--- a/projects/clr/rocclr/runtime/device/pal/palmemory.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palmemory.cpp
@@ -23,27 +23,21 @@
 namespace pal {
 
 Memory::Memory(const Device& gpuDev, amd::Memory& owner, size_t size)
-  : device::Memory(owner), Resource(gpuDev, size)
-  , pinnedMemory_(nullptr)
-  , parent_(nullptr) {
-
+    : device::Memory(owner), Resource(gpuDev, size), pinnedMemory_(nullptr), parent_(nullptr) {
   if (owner.parent() != nullptr) {
     flags_ |= SubMemoryObject;
   }
 }
 
 Memory::Memory(const Device& gpuDev, size_t size)
-  : device::Memory(size), Resource(gpuDev, size)
-  , pinnedMemory_(nullptr)
-  , parent_(nullptr) {
-}
+    : device::Memory(size), Resource(gpuDev, size), pinnedMemory_(nullptr), parent_(nullptr) {}
 
 Memory::Memory(const Device& gpuDev, amd::Memory& owner, size_t width, size_t height, size_t depth,
                cl_image_format format, cl_mem_object_type imageType, uint mipLevels)
-    : device::Memory(owner), Resource(gpuDev, width, height, depth, format, imageType, mipLevels)
-    , pinnedMemory_(nullptr)
-    , parent_(nullptr) {
-
+    : device::Memory(owner),
+      Resource(gpuDev, width, height, depth, format, imageType, mipLevels),
+      pinnedMemory_(nullptr),
+      parent_(nullptr) {
   if (owner.parent() != nullptr) {
     flags_ |= SubMemoryObject;
   }
@@ -51,10 +45,10 @@ Memory::Memory(const Device& gpuDev, amd::Memory& owner, size_t width, size_t he
 
 Memory::Memory(const Device& gpuDev, size_t size, size_t width, size_t height, size_t depth,
                cl_image_format format, cl_mem_object_type imageType, uint mipLevels)
-  : device::Memory(size), Resource(gpuDev, width, height, depth, format, imageType, mipLevels)
-  , pinnedMemory_(nullptr)
-  , parent_(nullptr) {
-}
+    : device::Memory(size),
+      Resource(gpuDev, width, height, depth, format, imageType, mipLevels),
+      pinnedMemory_(nullptr),
+      parent_(nullptr) {}
 
 #ifdef _WIN32
 static HANDLE getSharedHandle(IUnknown* pIface) {
@@ -130,7 +124,7 @@ bool Memory::create(Resource::MemoryType memType, Resource::CreateParams* params
         break;
       case Resource::Remote:
       case Resource::RemoteUSWC:
-          if ((!desc().tiled_) && (desc().dimSize_ != 3)) {
+        if ((!desc().tiled_) && (desc().dimSize_ != 3)) {
           // Marks memory object for direct GPU access to the host memory
           flags_ |= HostMemoryDirectAccess;
         }
@@ -402,7 +396,7 @@ Memory::~Memory() {
       (memoryType() != Resource::ExternalPhysical)) {
     // Unmap memory if direct access was requested
     // Note: runtime will perform unmap on the actual resource destruction
-    //unmap(nullptr);
+    // unmap(nullptr);
   }
 }
 
diff --git a/projects/clr/rocclr/runtime/device/pal/palmemory.hpp b/projects/clr/rocclr/runtime/device/pal/palmemory.hpp
index d84b23cbe6..2ce3062cce 100644
--- a/projects/clr/rocclr/runtime/device/pal/palmemory.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palmemory.hpp
@@ -32,12 +32,12 @@ class Memory : public device::Memory, public Resource {
   Memory(const Device& gpuDev,  //!< GPU device object
          amd::Memory& owner,    //!< Abstraction layer memory object
          size_t size            //!< Memory size for allocation
-         );
+  );
 
   //! Constructor (nonfat version for local scratch mem use without heap block)
   Memory(const Device& gpuDev,  //!< GPU device object
          size_t size            //!< Memory size for allocation
-         );
+  );
 
   //! Constructor memory for images (without global heap allocation)
   Memory(const Device& gpuDev,          //!< GPU device object
@@ -48,7 +48,7 @@ class Memory : public device::Memory, public Resource {
          cl_image_format format,        //!< Memory format
          cl_mem_object_type imageType,  //!< CL image type
          uint mipLevels                 //!< The number of mip levels
-         );
+  );
 
   //! Constructor memory for images (without global heap allocation)
   Memory(const Device& gpuDev,          //!< GPU device object
@@ -59,7 +59,7 @@ class Memory : public device::Memory, public Resource {
          cl_image_format format,        //!< Memory format
          cl_mem_object_type imageType,  //!< CL image type
          uint mipLevels                 //!< The number of mip levels
-         );
+  );
 
   //! Default destructor
   ~Memory();
@@ -70,7 +70,7 @@ class Memory : public device::Memory, public Resource {
   //! Overloads the resource create method
   virtual bool create(Resource::MemoryType memType,          //!< Memory type
                       Resource::CreateParams* params = NULL  //!< Prameters for create
-                      );
+  );
 
   //! Allocate memory for API-level maps
   virtual void* allocMapTarget(const amd::Coord3D& origin,  //!< The map location in memory
@@ -78,12 +78,12 @@ class Memory : public device::Memory, public Resource {
                                uint mapFlags,               //!< Map flags
                                size_t* rowPitch = NULL,     //!< Row pitch for the mapped memory
                                size_t* slicePitch = NULL    //!< Slice for the mapped memory
-                               );
+  );
 
   //! Pins system memory associated with this memory object
   virtual bool pinSystemMemory(void* hostPtr,  //!< System memory address
                                size_t size     //!< Size of allocated system memory
-                               );
+  );
 
   //! Releases indirect map surface
   virtual void releaseIndirectMap() { decIndMapCount(); }
@@ -96,15 +96,15 @@ class Memory : public device::Memory, public Resource {
                        uint numLayers = 0,        //!< End layer for multilayer map
                        size_t* rowPitch = NULL,   //!< Row pitch for the device memory
                        size_t* slicePitch = NULL  //!< Slice pitch for the device memory
-                       );
+  );
 
   //! Unmap the device memory
   virtual void cpuUnmap(device::VirtualDevice& vDev  //!< Virtual device for unmap operaiton
-                        );
+  );
 
   //! Updates device memory from the owner's host allocation
   void syncCacheFromHost(VirtualGPU& gpu,  //!< Virtual GPU device object
-                         //! Synchronization flags
+                                           //! Synchronization flags
                          device::Memory::SyncFlags syncFlags = device::Memory::SyncFlags());
 
   //! Updates the owner's host allocation from device memory
@@ -115,11 +115,13 @@ class Memory : public device::Memory, public Resource {
   //! Creates a view from current resource
   virtual Memory* createBufferView(
       amd::Memory& subBufferOwner  //!< The abstraction layer subbuf owner
-      );
+  );
 
   virtual uint64_t virtualAddress() const override { return vmAddress(); }
 
-  virtual const address cpuSrd() const { return reinterpret_cast<const address>(const_cast<void*>(hwState())); }
+  virtual const address cpuSrd() const {
+    return reinterpret_cast<const address>(const_cast<void*>(hwState()));
+  }
 
   //! Allocates host memory for synchronization with MGPU context
   void mgpuCacheWriteBack();
@@ -161,8 +163,8 @@ class Memory : public device::Memory, public Resource {
   //! Disable operator=
   Memory& operator=(const Memory&);
 
-  Memory* pinnedMemory_;     //!< Memory used as pinned system memory
-  const Memory* parent_;     //!< Parent memory object
+  Memory* pinnedMemory_;  //!< Memory used as pinned system memory
+  const Memory* parent_;  //!< Parent memory object
 };
 
 class Buffer : public pal::Memory {
@@ -219,7 +221,7 @@ class Image : public pal::Memory {
                                uint mapFlags,               //!< Map flags
                                size_t* rowPitch = NULL,     //!< Row pitch for the mapped memory
                                size_t* slicePitch = NULL    //!< Slice for the mapped memory
-                               );
+  );
 
   virtual uint64_t virtualAddress() const override { return hwSrd(); }
 
diff --git a/projects/clr/rocclr/runtime/device/pal/palprintf.hpp b/projects/clr/rocclr/runtime/device/pal/palprintf.hpp
index edb8077161..69dd871300 100644
--- a/projects/clr/rocclr/runtime/device/pal/palprintf.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palprintf.hpp
@@ -11,7 +11,7 @@
 #ifndef isinf
 #ifdef _MSC_VER
 #define isinf(X) (!_finite(X) && !_isnan(X))
-#else   //!_MSC_VER
+#else  //!_MSC_VER
 #define isinf(X) (std::isinf(X))
 #endif  //!_MSC_VER
 #endif  // isinf
@@ -19,7 +19,7 @@
 #ifndef isnan
 #ifdef _MSC_VER
 #define isnan(X) (_isnan(X))
-#else   //!_MSC_VER
+#else  //!_MSC_VER
 #define isnan(X) (std::isnan(X))
 #endif  //!_MSC_VER
 #endif  // isnan
@@ -55,14 +55,14 @@ class PrintfDbg : public amd::HeapObject {
   bool init(VirtualGPU& gpu,          //!< Virtual GPU object
             bool printfEnabled,       //!< checks for printf
             const amd::NDRange& size  //!< Kernel's workload
-            );
+  );
 
   //! Prints the kernel's debug informaiton from the buffer
-  bool output(VirtualGPU& gpu,                           //!< Virtual GPU object
-              bool printfEnabled,                        //!< checks for printf
-              const amd::NDRange& size,                  //!< Kernel's workload
+  bool output(VirtualGPU& gpu,                                   //!< Virtual GPU object
+              bool printfEnabled,                                //!< checks for printf
+              const amd::NDRange& size,                          //!< Kernel's workload
               const std::vector<device::PrintfInfo>& printfInfo  //!< printf info
-              );
+  );
 
   //! Debug buffer size per workitem
   size_t wiDbgSize() const { return wiDbgSize_; }
@@ -81,7 +81,7 @@ class PrintfDbg : public amd::HeapObject {
 
   //! Allocates the debug buffer
   bool allocate(bool realloc = false  //!< If TRUE then reallocate the debug memory
-                );
+  );
 
   //! Returns TRUE if a float value has to be printed
   bool checkFloat(const std::string& fmt  //!< Format string
@@ -105,9 +105,9 @@ class PrintfDbg : public amd::HeapObject {
                         ) const;
 
   //! Displays the PrintfDbg
-  void outputDbgBuffer(const device::PrintfInfo& info,//!< printf info
-                       const uint32_t* workitemData,  //!< The PrintfDbg dump buffer
-                       size_t& i                      //!< index to the data in the buffer
+  void outputDbgBuffer(const device::PrintfInfo& info,  //!< printf info
+                       const uint32_t* workitemData,    //!< The PrintfDbg dump buffer
+                       size_t& i                        //!< index to the data in the buffer
                        ) const;
 
  private:
@@ -127,7 +127,7 @@ class PrintfDbg : public amd::HeapObject {
   uint32_t* mapWorkitem(VirtualGPU& gpu,  //!< Virtual GPU object
                         size_t idx,       //!< Workitem global index
                         bool* realloc     //!< Returns TRUE if workitem reached the buffer limit
-                        );
+  );
 
   //! Unamp the staged buffer
   void unmapWorkitem(VirtualGPU& gpu,              //!< Virtual GPU object
@@ -145,13 +145,13 @@ class PrintfDbgHSA : public PrintfDbg {
   //! Initializes the debug buffer before kernel's execution
   bool init(VirtualGPU& gpu,    //!< Virtual GPU object
             bool printfEnabled  //!< checks for printf
-            );
+  );
 
   //! Prints the kernel's debug informaiton from the buffer
-  bool output(VirtualGPU& gpu,                           //!< Virtual GPU object
-              bool printfEnabled,                        //!< checks for printf
+  bool output(VirtualGPU& gpu,                                   //!< Virtual GPU object
+              bool printfEnabled,                                //!< checks for printf
               const std::vector<device::PrintfInfo>& printfInfo  //!< printf info
-              );
+  );
 
  private:
   //! Disable copy constructor
@@ -161,4 +161,5 @@ class PrintfDbgHSA : public PrintfDbg {
   PrintfDbgHSA& operator=(const PrintfDbgHSA&);
 };
 
-/*@}*/} // namespace pal
+/*@}*/  // namespace pal
+}  // namespace pal
diff --git a/projects/clr/rocclr/runtime/device/pal/palprogram.cpp b/projects/clr/rocclr/runtime/device/pal/palprogram.cpp
index ed788bda56..85d404e897 100644
--- a/projects/clr/rocclr/runtime/device/pal/palprogram.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palprogram.cpp
@@ -65,10 +65,10 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t
   align = amd::alignUp(align, sizeof(uint32_t));
 
   amd::Memory* amd_mem_obj = new (prog.dev().context())
-    amd::Buffer(prog.dev().context(), 0, amd::alignUp(size, align),
-    // HIP requires SVM allocation for segment code due to possible global variable access and
-    // global variables are a part of code segment with the latest loader
-    amd::IS_HIP ? reinterpret_cast<void*>(1) : nullptr);
+      amd::Buffer(prog.dev().context(), 0, amd::alignUp(size, align),
+                  // HIP requires SVM allocation for segment code due to possible global variable
+                  // access and global variables are a part of code segment with the latest loader
+                  amd::IS_HIP ? reinterpret_cast<void*>(1) : nullptr);
 
   if (amd_mem_obj == nullptr) {
     LogError("[OCL] failed to create a mem object!");
@@ -103,9 +103,9 @@ bool Segment::alloc(HSAILProgram& prog, amdgpu_hsa_elf_segment_t segment, size_t
 
   if (zero && !prog.isInternal()) {
     uint64_t pattern = 0;
-    size_t   patternSize = ((size % sizeof(pattern)) == 0) ? sizeof(pattern) : 1;
-    prog.dev().xferMgr().fillBuffer(*gpuAccess_, &pattern, patternSize,
-        amd::Coord3D(0), amd::Coord3D(size));
+    size_t patternSize = ((size % sizeof(pattern)) == 0) ? sizeof(pattern) : 1;
+    prog.dev().xferMgr().fillBuffer(*gpuAccess_, &pattern, patternSize, amd::Coord3D(0),
+                                    amd::Coord3D(size));
   }
 
   switch (segment) {
@@ -237,7 +237,7 @@ inline static std::vector<std::string> splitSpaceSeparatedString(char* str) {
 }
 
 bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_t binSize) {
-#if  defined(WITH_COMPILER_LIB)
+#if defined(WITH_COMPILER_LIB)
   // ACL_TYPE_CG stage is not performed for offline compilation
   hsa_agent_t agent;
   agent.handle = 1;
@@ -262,8 +262,8 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
   }
 
   size_t kernelNamesSize = 0;
-  acl_error errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_KERNEL_NAMES,
-    nullptr, nullptr, &kernelNamesSize);
+  acl_error errorCode = aclQueryInfo(dev().compiler(), binaryElf_, RT_KERNEL_NAMES, nullptr,
+                                     nullptr, &kernelNamesSize);
   if (errorCode != ACL_SUCCESS) {
     buildLog_ += "Error: Querying of kernel names size from the binary failed.\n";
     return false;
@@ -274,11 +274,11 @@ bool HSAILProgram::setKernels(amd::option::Options* options, void* binary, size_
                              &kernelNamesSize);
     if (errorCode != ACL_SUCCESS) {
       buildLog_ += "Error: Querying of kernel names from the binary failed.\n";
-      delete [] kernelNames;
+      delete[] kernelNames;
       return false;
     }
     std::vector<std::string> vKernels = splitSpaceSeparatedString(kernelNames);
-    delete [] kernelNames;
+    delete[] kernelNames;
     bool dynamicParallelism = false;
     for (const auto& it : vKernels) {
       std::string kernelName(it);
@@ -338,12 +338,10 @@ bool HSAILProgram::allocKernelTable() {
   return true;
 }
 
-void HSAILProgram::fillResListWithKernels(VirtualGPU& gpu) const {
-  gpu.addVmMemory(&codeSegGpu());
-}
+void HSAILProgram::fillResListWithKernels(VirtualGPU& gpu) const { gpu.addVmMemory(&codeSegGpu()); }
 
 const aclTargetInfo& HSAILProgram::info(const char* str) {
-#if  defined(WITH_COMPILER_LIB)
+#if defined(WITH_COMPILER_LIB)
   acl_error err;
   std::string arch = "hsail";
   if (dev().settings().use64BitPtr_) {
@@ -359,7 +357,7 @@ const aclTargetInfo& HSAILProgram::info(const char* str) {
 }
 
 bool HSAILProgram::saveBinaryAndSetType(type_t type) {
-#if  defined(WITH_COMPILER_LIB)
+#if defined(WITH_COMPILER_LIB)
   // Write binary to memory
   if (rawBinary_ != nullptr) {
     // Free memory containing rawBinary
@@ -378,8 +376,8 @@ bool HSAILProgram::saveBinaryAndSetType(type_t type) {
   return true;
 }
 
-bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr,
-                                      size_t* bytes, const char* global_name) const {
+bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_pptr, size_t* bytes,
+                                      const char* global_name) const {
   uint32_t length = 0;
   size_t offset = 0;
   uint32_t flags = 0;
@@ -456,7 +454,7 @@ bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_p
   }
 
   /* Retrieve the Offset from global pal::Memory created @ segment::alloc */
-  if(!codeSegment_->gpuAddressOffset(reinterpret_cast<uint64_t>(*device_pptr), &offset)) {
+  if (!codeSegment_->gpuAddressOffset(reinterpret_cast<uint64_t>(*device_pptr), &offset)) {
     buildLog_ += "Error: Cannot Retrieve the Address Offset";
     buildLog_ += "\n";
     return false;
@@ -484,13 +482,12 @@ bool HSAILProgram::createGlobalVarObj(amd::Memory** amd_mem_obj, void** device_p
 
 hsa_isa_t PALHSALoaderContext::IsaFromName(const char* name) {
   hsa_isa_t isa = {0};
-  uint32_t gfxip  = 0;
+  uint32_t gfxip = 0;
   std::string gfx_target(name);
   if (gfx_target.find("amdgcn-") == 0) {
     std::string gfxip_version_str = gfx_target.substr(gfx_target.find("gfx") + 3);
     gfxip = std::atoi(gfxip_version_str.c_str());
-  }
-  else {
+  } else {
     // FIXME: Old way. To be remove.
     uint32_t shift = 1;
     size_t last = gfx_target.length();
@@ -508,9 +505,9 @@ hsa_isa_t PALHSALoaderContext::IsaFromName(const char* name) {
 }
 
 bool PALHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa) {
-  uint32_t gfxipVersion = program_->dev().settings().useLightning_ ?
-    program_->dev().hwInfo()->gfxipVersionLC_ :
-    program_->dev().hwInfo()->gfxipVersion_;
+  uint32_t gfxipVersion = program_->dev().settings().useLightning_
+      ? program_->dev().hwInfo()->gfxipVersionLC_
+      : program_->dev().hwInfo()->gfxipVersion_;
   uint32_t majorSrc = gfxipVersion / 10;
   uint32_t minorSrc = gfxipVersion % 10;
 
@@ -519,11 +516,9 @@ bool PALHSALoaderContext::IsaSupportedByAgent(hsa_agent_t agent, hsa_isa_t isa)
 
   if (majorSrc != majorTrg) {
     return false;
-  }
-  else if (minorTrg == minorSrc) {
+  } else if (minorTrg == minorSrc) {
     return true;
-  }
-  else if (minorTrg < minorSrc) {
+  } else if (minorTrg < minorSrc) {
     LogWarning("ISA downgrade for execution!");
     return true;
   }
@@ -708,7 +703,7 @@ static hsa_status_t GetKernelNamesCallback(hsa_executable_t hExec, hsa_executabl
   return HSA_STATUS_SUCCESS;
 }
 
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
 
 bool LightningProgram::createBinary(amd::option::Options* options) {
 #if defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
@@ -716,7 +711,7 @@ bool LightningProgram::createBinary(amd::option::Options* options) {
     LogError("Failed to create ELF binary image!");
     return false;
   }
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
   return true;
 }
 
@@ -752,10 +747,10 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
   }
 
 #if defined(USE_COMGR_LIBRARY)
-  for (const auto &kernelMeta : kernelMetadataMap_) {
+  for (const auto& kernelMeta : kernelMetadataMap_) {
     auto kernelName = kernelMeta.first;
-    auto kernel = new LightningKernel(kernelName, this,
-                                      options->origOptionStr + ProcessOptions(options));
+    auto kernel =
+        new LightningKernel(kernelName, this, options->origOptionStr + ProcessOptions(options));
     kernels()[kernelName] = kernel;
 
     if (!kernel->init()) {
@@ -804,9 +799,9 @@ bool LightningProgram::setKernels(amd::option::Options* options, void* binary, s
     maxScratchRegs_ =
         std::max(static_cast<uint>(kernel->workGroupInfo()->scratchRegs_), maxScratchRegs_);
   }
-#endif // defined(USE_COMGR_LIBRARY)
+#endif  // defined(USE_COMGR_LIBRARY)
   DestroySegmentCpuAccess();
-#endif // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
+#endif  // defined(WITH_LIGHTNING_COMPILER) || defined(USE_COMGR_LIBRARY)
   return true;
 }
 
diff --git a/projects/clr/rocclr/runtime/device/pal/palprogram.hpp b/projects/clr/rocclr/runtime/device/pal/palprogram.hpp
index 32e98aab6f..ddc41c0c1d 100644
--- a/projects/clr/rocclr/runtime/device/pal/palprogram.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palprogram.hpp
@@ -9,15 +9,15 @@
 namespace amd {
 namespace option {
 class Options;
-}  // option
+}  // namespace option
 namespace hsa {
 namespace loader {
 class Loader;
 class Executable;
 class Context;
-}  // loader
-}  // hsa
-}  // amd
+}  // namespace loader
+}  // namespace hsa
+}  // namespace amd
 
 //! \namespace pal PAL Device Implementation
 namespace pal {
@@ -50,15 +50,16 @@ class Segment : public amd::HeapObject {
   bool gpuAddressOffset(uint64_t offAddr, size_t* offset);
 
   //! Returns address for CPU access in the segment
-  void* cpuAddress(size_t offset) const
-    { return ((cpuAccess_ != nullptr) ? cpuAccess_->data() : cpuMem_) + offset; }
+  void* cpuAddress(size_t offset) const {
+    return ((cpuAccess_ != nullptr) ? cpuAccess_->data() : cpuMem_) + offset;
+  }
 
   void DestroyCpuAccess();
 
  private:
-  Memory* gpuAccess_;   //!< GPU memory for segment access
-  Memory* cpuAccess_;   //!< CPU memory for segment (backing store)
-  address cpuMem_;      //!< CPU memory for segment without GPU direct access (backing store)
+  Memory* gpuAccess_;  //!< GPU memory for segment access
+  Memory* cpuAccess_;  //!< CPU memory for segment (backing store)
+  address cpuMem_;     //!< CPU memory for segment without GPU direct access (backing store)
 };
 
 class PALHSALoaderContext final : public Context {
@@ -166,7 +167,7 @@ class HSAILProgram : public device::Program {
   }
 
   //! Get symbol by name
-  amd::hsa::loader::Symbol* GetSymbol(const char* symbol_name, const hsa_agent_t *agent) const {
+  amd::hsa::loader::Symbol* GetSymbol(const char* symbol_name, const hsa_agent_t* agent) const {
     return executable_->GetSymbol(symbol_name, agent);
   }
 
@@ -180,11 +181,14 @@ class HSAILProgram : public device::Program {
   virtual bool setKernels(amd::option::Options* options, void* binary, size_t binSize) override;
 
   //! Destroys CPU allocations in the code segment
-  void DestroySegmentCpuAccess() const
-    { if (codeSegment_ != nullptr) { codeSegment_->DestroyCpuAccess(); } }
+  void DestroySegmentCpuAccess() const {
+    if (codeSegment_ != nullptr) {
+      codeSegment_->DestroyCpuAccess();
+    }
+  }
 
-  virtual bool createGlobalVarObj(amd::Memory** amd_mem_obj, void** dptr,
-                                  size_t* bytes, const char* globalName) const;
+  virtual bool createGlobalVarObj(amd::Memory** amd_mem_obj, void** dptr, size_t* bytes,
+                                  const char* globalName) const;
 
  private:
   //! Disable default copy constructor
@@ -201,7 +205,7 @@ class HSAILProgram : public device::Program {
   std::vector<Memory*> globalStores_;  //!< Global memory for the program
   Memory* kernels_;                    //!< Table with kernel object pointers
   Memory* codeSegGpu_;                 //!< GPU memory with code objects
-  Segment*  codeSegment_;              //!< Pointer to the code segment for this program
+  Segment* codeSegment_;               //!< Pointer to the code segment for this program
   uint
       maxScratchRegs_;  //!< Maximum number of scratch regs used in the program by individual kernel
   std::list<Sampler*> staticSamplers_;  //!< List od internal static samplers
@@ -214,19 +218,17 @@ class HSAILProgram : public device::Program {
 //! \class Lightning Compiler Program
 class LightningProgram : public HSAILProgram {
  public:
-  LightningProgram(NullDevice& device)
-    : HSAILProgram(device) {
-      isLC_ = true;
-      xnackEnabled_ = dev().hwInfo()->xnackEnabled_;
-      machineTarget_ = dev().hwInfo()->machineTargetLC_;
-    }
+  LightningProgram(NullDevice& device) : HSAILProgram(device) {
+    isLC_ = true;
+    xnackEnabled_ = dev().hwInfo()->xnackEnabled_;
+    machineTarget_ = dev().hwInfo()->machineTargetLC_;
+  }
 
-  LightningProgram(Device& device)
-    : HSAILProgram(device) {
-      isLC_ = true;
-      xnackEnabled_ = dev().hwInfo()->xnackEnabled_;
-      machineTarget_ = dev().hwInfo()->machineTargetLC_;
-    }
+  LightningProgram(Device& device) : HSAILProgram(device) {
+    isLC_ = true;
+    xnackEnabled_ = dev().hwInfo()->xnackEnabled_;
+    machineTarget_ = dev().hwInfo()->machineTargetLC_;
+  }
   virtual ~LightningProgram() {}
 
  protected:
@@ -235,4 +237,5 @@ class LightningProgram : public HSAILProgram {
   virtual bool createBinary(amd::option::Options* options) override;
 };
 
-/*@}*/} // namespace pal
+/*@}*/  // namespace pal
+}  // namespace pal
diff --git a/projects/clr/rocclr/runtime/device/pal/palresource.cpp b/projects/clr/rocclr/runtime/device/pal/palresource.cpp
index e3a719cc38..088978846b 100644
--- a/projects/clr/rocclr/runtime/device/pal/palresource.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palresource.cpp
@@ -41,8 +41,8 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
   if (memRef != nullptr) {
     result = dev.iDev()->CreateGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_);
     if ((result != Pal::Result::Success) &&
-         // Free cache if PAL failed allocation
-         dev.resourceCache().free()) {
+        // Free cache if PAL failed allocation
+        dev.resourceCache().free()) {
       // If cache was freed, then try to allocate again
       result = dev.iDev()->CreateGpuMemory(createInfo, &memRef[1], &memRef->gpuMem_);
     }
@@ -154,8 +154,7 @@ GpuMemoryReference* GpuMemoryReference::Create(const Device& dev,
 
 // ================================================================================================
 GpuMemoryReference::GpuMemoryReference(const Device& dev)
-  : gpuMem_(nullptr), cpuAddress_(nullptr), device_(dev), gpu_(nullptr)
-{}
+    : gpuMem_(nullptr), cpuAddress_(nullptr), device_(dev), gpu_(nullptr) {}
 
 // ================================================================================================
 GpuMemoryReference::~GpuMemoryReference() {
@@ -181,8 +180,7 @@ GpuMemoryReference::~GpuMemoryReference() {
     iMem()->Unmap();
   }
   if (0 != iMem()) {
-    if (!(iMem()->Desc().flags.isShared ||
-          iMem()->Desc().flags.isExternal ||
+    if (!(iMem()->Desc().flags.isShared || iMem()->Desc().flags.isExternal ||
           iMem()->Desc().flags.isExternPhys)) {
       // Update free memory size counters
       device_.updateAllocedMemory(iMem()->Desc().preferredHeap, iMem()->Desc().size, true);
@@ -368,7 +366,7 @@ void Resource::memTypeToHeap(Pal::GpuMemoryCreateInfo* createInfo) {
     case Persistent:
       createInfo->heapCount = 2;
       createInfo->heaps[0] = Pal::GpuHeapLocal;
-      createInfo->heaps[1] = Pal:: GpuHeapGartUswc;
+      createInfo->heaps[1] = Pal::GpuHeapGartUswc;
 #ifdef ATI_OS_LINUX
       // Note: SSG in Linux requires DGMA heap
       if (dev().properties().gpuMemoryProperties.busAddressableMemSize > 0) {
@@ -401,11 +399,10 @@ void Resource::memTypeToHeap(Pal::GpuMemoryCreateInfo* createInfo) {
 }
 
 // ================================================================================================
-bool Resource::CreateImage(CreateParams* params)
-{
+bool Resource::CreateImage(CreateParams* params) {
   Pal::Result result;
-  Pal::SubresId ImgSubresId = { Pal::ImageAspect::Color, 0, 0 };
-  Pal::SubresRange ImgSubresRange = { ImgSubresId, 1, 1 };
+  Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, 0, 0};
+  Pal::SubresRange ImgSubresRange = {ImgSubresId, 1, 1};
   Pal::ChannelMapping channels;
   Pal::ChNumFormat format = dev().getPalFormat(desc().format_, &channels);
 
@@ -417,8 +414,7 @@ bool Resource::CreateImage(CreateParams* params)
       memRef_->retain();
       desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
       offset_ += viewOwner_->offset_;
-    }
-    else {
+    } else {
       Pal::GpuMemoryCreateInfo createInfo = {};
       createInfo.size = desc().width_ * elementSize();
       createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
@@ -427,8 +423,8 @@ bool Resource::CreateImage(CreateParams* params)
       createInfo.priority = Pal::GpuMemPriority::Normal;
       memTypeToHeap(&createInfo);
       // createInfo.priority;
-      memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size,
-        createInfo.alignment, nullptr, &subOffset_);
+      memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment,
+                                                    nullptr, &subOffset_);
       if (nullptr == memRef_) {
         memRef_ = GpuMemoryReference::Create(dev(), createInfo);
         if (nullptr == memRef_) {
@@ -477,16 +473,16 @@ bool Resource::CreateImage(CreateParams* params)
   imgCreateInfo.arraySize = 1;
 
   switch (desc_.topology_) {
-  case CL_MEM_OBJECT_IMAGE3D:
-    imgCreateInfo.imageType = Pal::ImageType::Tex3d;
-    viewInfo.viewType = Pal::ImageViewType::Tex3d;
-    break;
-  case CL_MEM_OBJECT_IMAGE1D:
-  case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-  case CL_MEM_OBJECT_IMAGE1D_BUFFER:
-    imgCreateInfo.imageType = Pal::ImageType::Tex1d;
-    viewInfo.viewType = Pal::ImageViewType::Tex1d;
-    break;
+    case CL_MEM_OBJECT_IMAGE3D:
+      imgCreateInfo.imageType = Pal::ImageType::Tex3d;
+      viewInfo.viewType = Pal::ImageViewType::Tex3d;
+      break;
+    case CL_MEM_OBJECT_IMAGE1D:
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+    case CL_MEM_OBJECT_IMAGE1D_BUFFER:
+      imgCreateInfo.imageType = Pal::ImageType::Tex1d;
+      viewInfo.viewType = Pal::ImageViewType::Tex1d;
+      break;
   }
   if (desc_.topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
     ImgSubresRange.numSlices = imgCreateInfo.arraySize = desc_.height_;
@@ -504,8 +500,7 @@ bool Resource::CreateImage(CreateParams* params)
     ImgSubresRange.startSubres.arraySlice = imageView->layer_;
     viewOwner_ = imageView->resource_;
     image_ = viewOwner_->image_;
-  }
-  else if (memoryType() == ImageBuffer) {
+  } else if (memoryType() == ImageBuffer) {
     ImageBufferParams* imageBuffer = reinterpret_cast<ImageBufferParams*>(params);
     viewOwner_ = imageBuffer->resource_;
   }
@@ -515,11 +510,11 @@ bool Resource::CreateImage(CreateParams* params)
   ImgSubresRange.numMips = desc().mipLevels_;
 
   if ((memoryType() != ImageView) ||
-    //! @todo PAL doesn't allow an SRD view creation with different pixel size
-    (elementSize() != viewOwner_->elementSize())) {
+      //! @todo PAL doesn't allow an SRD view creation with different pixel size
+      (elementSize() != viewOwner_->elementSize())) {
     imgCreateInfo.usageFlags.shaderRead = true;
     imgCreateInfo.usageFlags.shaderWrite =
-      (format == Pal::ChNumFormat::X8Y8Z8W8_Srgb) ? false : true;
+        (format == Pal::ChNumFormat::X8Y8Z8W8_Srgb) ? false : true;
     imgCreateInfo.swizzledFormat.format = format;
     imgCreateInfo.swizzledFormat.swizzle = channels;
     imgCreateInfo.mipLevels = (desc_.mipLevels_) ? desc_.mipLevels_ : 1;
@@ -529,10 +524,9 @@ bool Resource::CreateImage(CreateParams* params)
     uint32_t rowPitch = 0;
 
     if (((memoryType() == Persistent) && dev().settings().linearPersistentImage_) ||
-      (memoryType() == ImageBuffer)) {
+        (memoryType() == ImageBuffer)) {
       tiling = Pal::ImageTiling::Linear;
-    }
-    else if (memoryType() == ImageView) {
+    } else if (memoryType() == ImageView) {
       tiling = viewOwner_->image_->GetImageCreateInfo().tiling;
       // Find the new pitch in pixels for the new format
       rowPitch = viewOwner_->desc().pitch_ * viewOwner_->elementSize() / elementSize();
@@ -540,10 +534,9 @@ bool Resource::CreateImage(CreateParams* params)
 
     if (memoryType() == ImageBuffer) {
       if ((params->owner_ != NULL) && params->owner_->asImage() &&
-        (params->owner_->asImage()->getRowPitch() != 0)) {
+          (params->owner_->asImage()->getRowPitch() != 0)) {
         rowPitch = params->owner_->asImage()->getRowPitch() / elementSize();
-      }
-      else {
+      } else {
         rowPitch = desc().width_;
       }
     }
@@ -579,8 +572,8 @@ bool Resource::CreateImage(CreateParams* params)
     createInfo.priority = Pal::GpuMemPriority::Normal;
     memTypeToHeap(&createInfo);
 
-    memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size,
-      createInfo.alignment, nullptr, &subOffset_);
+    memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment,
+                                                  nullptr, &subOffset_);
     if (nullptr == memRef_) {
       memRef_ = GpuMemoryReference::Create(dev(), createInfo);
       if (nullptr == memRef_) {
@@ -589,8 +582,7 @@ bool Resource::CreateImage(CreateParams* params)
       }
     }
     offset_ += static_cast<size_t>(subOffset_);
-  }
-  else {
+  } else {
     memRef_ = viewOwner_->memRef_;
     memRef_->retain();
     desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
@@ -627,11 +619,10 @@ bool Resource::CreateImage(CreateParams* params)
 }
 
 // ================================================================================================
-bool Resource::CreateInterop(CreateParams* params)
-{
+bool Resource::CreateInterop(CreateParams* params) {
   Pal::Result result;
-  Pal::SubresId ImgSubresId = { Pal::ImageAspect::Color, 0, 0 };
-  Pal::SubresRange ImgSubresRange = { ImgSubresId, 1, 1 };
+  Pal::SubresId ImgSubresId = {Pal::ImageAspect::Color, 0, 0};
+  Pal::SubresRange ImgSubresRange = {ImgSubresId, 1, 1};
   Pal::ChannelMapping channels;
   Pal::ChNumFormat format = dev().getPalFormat(desc().format_, &channels);
   Pal::ExternalGpuMemoryOpenInfo gpuMemOpenInfo = {};
@@ -645,21 +636,21 @@ bool Resource::CreateInterop(CreateParams* params)
     OGLInteropParams* oglRes = reinterpret_cast<OGLInteropParams*>(params);
     assert(oglRes->glPlatformContext_ && "We don't have OGL context!");
     switch (oglRes->type_) {
-    case InteropVertexBuffer:
-      glType_ = GL_RESOURCE_ATTACH_VERTEXBUFFER_AMD;
-      break;
-    case InteropRenderBuffer:
-      glType_ = GL_RESOURCE_ATTACH_RENDERBUFFER_AMD;
-      break;
-    case InteropTexture:
-    case InteropTextureViewLevel:
-    case InteropTextureViewCube:
-      glType_ = GL_RESOURCE_ATTACH_TEXTURE_AMD;
-      break;
-    default:
-      LogError("Unknown OGL interop type!");
-      return false;
-      break;
+      case InteropVertexBuffer:
+        glType_ = GL_RESOURCE_ATTACH_VERTEXBUFFER_AMD;
+        break;
+      case InteropRenderBuffer:
+        glType_ = GL_RESOURCE_ATTACH_RENDERBUFFER_AMD;
+        break;
+      case InteropTexture:
+      case InteropTextureViewLevel:
+      case InteropTextureViewCube:
+        glType_ = GL_RESOURCE_ATTACH_TEXTURE_AMD;
+        break;
+      default:
+        LogError("Unknown OGL interop type!");
+        return false;
+        break;
     }
     glPlatformContext_ = oglRes->glPlatformContext_;
     layer = oglRes->layer_;
@@ -667,17 +658,18 @@ bool Resource::CreateInterop(CreateParams* params)
     mipLevel = oglRes->mipLevel_;
 
     if (!dev().resGLAssociate(oglRes->glPlatformContext_, oglRes->handle_, glType_,
-      &openInfo.hExternalResource, &glInteropMbRes_, &offset_, desc_.format_
+                              &openInfo.hExternalResource, &glInteropMbRes_, &offset_, desc_.format_
 #ifdef ATI_OS_WIN
-      , openInfo.doppDesktopInfo
+                              ,
+                              openInfo.doppDesktopInfo
 #endif
-    )) {
+                              )) {
       return false;
     }
     desc_.isDoppTexture_ = (openInfo.doppDesktopInfo.gpuVirtAddr != 0);
     format = dev().getPalFormat(desc().format_, &channels);
   }
-#ifdef ATI_OS_WIN	
+#ifdef ATI_OS_WIN
   else {
     D3DInteropParams* d3dRes = reinterpret_cast<D3DInteropParams*>(params);
     openInfo.hExternalResource = d3dRes->handle_;
@@ -713,8 +705,8 @@ bool Resource::CreateInterop(CreateParams* params)
       size_t gpuMemSize;
 
       if (Pal::Result::Success !=
-        dev().iDev()->GetExternalSharedImageSizes(imgOpenInfo, &imageSize, &gpuMemSize,
-          &imgCreateInfo)) {
+          dev().iDev()->GetExternalSharedImageSizes(imgOpenInfo, &imageSize, &gpuMemSize,
+                                                    &imgCreateInfo)) {
         return false;
       }
 
@@ -736,51 +728,51 @@ bool Resource::CreateInterop(CreateParams* params)
       imgCreateInfo.depthPitch = desc().height_ * imgCreateInfo.rowPitch;
 
       switch (misc) {
-      case 1:  // NV12 or P010 formats
-        switch (layer) {
-        case -1:
-        case 0:
+        case 1:  // NV12 or P010 formats
+          switch (layer) {
+            case -1:
+            case 0:
+              break;
+            case 1:
+              // Y - plane size to the offset
+              // NV12 format. UV is 2 times smaller plane Y
+              viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
+              imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
+              break;
+            default:
+              LogError("Unknown Interop View Type");
+              return false;
+          }
           break;
-        case 1:
-          // Y - plane size to the offset
-          // NV12 format. UV is 2 times smaller plane Y
-          viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
+        case 2:  // YV12 format
+          switch (layer) {
+            case -1:
+            case 0:
+              break;
+            case 1:
+              // Y - plane size to the offset
+              // YV12 format. U is 4 times smaller plane than Y
+              viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
+              imgCreateInfo.rowPitch >>= 1;
+              break;
+            case 2:
+              // Y + U plane sizes to the offest.
+              // U plane is 4 times smaller than Y and U == V
+              viewOffset = 5 * imgCreateInfo.rowPitch * desc().height_ / 2;
+              imgCreateInfo.rowPitch >>= 1;
+              break;
+            default:
+              LogError("Unknown Interop View Type");
+              return false;
+          }
+          imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
+          break;
+        case 3:  // YUY2 format
           imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
           break;
         default:
           LogError("Unknown Interop View Type");
           return false;
-        }
-        break;
-      case 2:  // YV12 format
-        switch (layer) {
-        case -1:
-        case 0:
-          break;
-        case 1:
-          // Y - plane size to the offset
-          // YV12 format. U is 4 times smaller plane than Y
-          viewOffset = 2 * imgCreateInfo.rowPitch * desc().height_;
-          imgCreateInfo.rowPitch >>= 1;
-          break;
-        case 2:
-          // Y + U plane sizes to the offest.
-          // U plane is 4 times smaller than Y and U == V
-          viewOffset = 5 * imgCreateInfo.rowPitch * desc().height_ / 2;
-          imgCreateInfo.rowPitch >>= 1;
-          break;
-        default:
-          LogError("Unknown Interop View Type");
-          return false;
-        }
-        imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
-        break;
-      case 3:  // YUY2 format
-        imgCreateInfo.depthPitch = imgCreateInfo.rowPitch * desc().height_;
-        break;
-      default:
-        LogError("Unknown Interop View Type");
-        return false;
       }
 
       imageSize = dev().iDev()->GetImageSize(imgCreateInfo, &result);
@@ -820,8 +812,7 @@ bool Resource::CreateInterop(CreateParams* params)
       hwState_[10] = static_cast<uint32_t>(desc().width_);
       hwState_[11] = 0;  // one extra reserved field in the argument
     }
-  }
-  else if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+  } else if (desc().topology_ == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
     memRef_ = GpuMemoryReference::Create(dev(), gpuMemOpenInfo);
     if (nullptr == memRef_) {
       return false;
@@ -842,8 +833,7 @@ bool Resource::CreateInterop(CreateParams* params)
     hwState_[9] = GetHSAILImageOrderType(desc().format_);
     hwState_[10] = static_cast<uint32_t>(desc().width_);
     hwState_[11] = 0;  // one extra reserved field in the argument
-  }
-  else {
+  } else {
     Pal::ExternalImageOpenInfo imgOpenInfo = {};
     Pal::ImageCreateInfo imgCreateInfo = {};
     imgOpenInfo.resourceInfo = openInfo;
@@ -865,14 +855,14 @@ bool Resource::CreateInterop(CreateParams* params)
     viewInfo.possibleLayouts.usages = Pal::LayoutShaderWrite;
     viewInfo.viewType = Pal::ImageViewType::Tex2d;
     switch (imgCreateInfo.imageType) {
-    case Pal::ImageType::Tex3d:
-      viewInfo.viewType = Pal::ImageViewType::Tex3d;
-      break;
-    case Pal::ImageType::Tex1d:
-      viewInfo.viewType = Pal::ImageViewType::Tex1d;
-      break;
-    default:
-      break;
+      case Pal::ImageType::Tex3d:
+        viewInfo.viewType = Pal::ImageViewType::Tex3d;
+        break;
+      case Pal::ImageType::Tex1d:
+        viewInfo.viewType = Pal::ImageViewType::Tex1d;
+        break;
+      default:
+        break;
     }
     viewInfo.pImage = image_;
     viewInfo.swizzledFormat.format = format;
@@ -897,14 +887,13 @@ bool Resource::CreateInterop(CreateParams* params)
     //! It's a workaround for D24S8 format, since PAL doesn't support this format
     //! and GSL decompresses 24bit DEPTH into D24S8 for OGL compatibility
     if ((desc().format_.image_channel_order == CL_DEPTH_STENCIL) &&
-      (desc().format_.image_channel_data_type == CL_UNORM_INT24)) {
-        if (dev().settings().gfx10Plus_) {
-          hwState_[1] = (hwState_[1] & ~0x1ff00000) | 0x08d00000;
-        }
-        else {
-          hwState_[1] &= ~0x3c000000;
-          hwState_[1] = (hwState_[1] & ~0x3f00000) | 0x1400000;
-        }
+        (desc().format_.image_channel_data_type == CL_UNORM_INT24)) {
+      if (dev().settings().gfx10Plus_) {
+        hwState_[1] = (hwState_[1] & ~0x1ff00000) | 0x08d00000;
+      } else {
+        hwState_[1] &= ~0x3c000000;
+        hwState_[1] = (hwState_[1] & ~0x3f00000) | 0x1400000;
+      }
     }
     hwState_[8] = GetHSAILImageFormatType(desc().format_);
     hwState_[9] = GetHSAILImageOrderType(desc().format_);
@@ -915,8 +904,7 @@ bool Resource::CreateInterop(CreateParams* params)
 }
 
 // ================================================================================================
-bool Resource::CreatePinned(CreateParams* params)
-{
+bool Resource::CreatePinned(CreateParams* params) {
   PinnedParams* pinned = reinterpret_cast<PinnedParams*>(params);
   size_t allocSize = pinned->size_;
   const amd::HostMemoryReference* hostMemRef = pinned->hostMemRef_;
@@ -926,7 +914,7 @@ bool Resource::CreatePinned(CreateParams* params)
   if (desc().topology_ == CL_MEM_OBJECT_BUFFER) {
     // Allign offset to 4K boundary (Vista/Win7 limitation)
     char* tmpHost = const_cast<char*>(
-      amd::alignDown(reinterpret_cast<const char*>(address_), PinnedMemoryAlignment));
+        amd::alignDown(reinterpret_cast<const char*>(address_), PinnedMemoryAlignment));
 
     // Find the partial size for unaligned copy
     hostMemOffset = static_cast<uint>(reinterpret_cast<const char*>(address_) - tmpHost);
@@ -940,18 +928,16 @@ bool Resource::CreatePinned(CreateParams* params)
     }
     allocSize = amd::alignUp(allocSize, PinnedMemoryAlignment);
     //            hostMemOffset &= ~(0xff);
-  }
-  else if (desc().topology_ == CL_MEM_OBJECT_IMAGE2D) {
+  } else if (desc().topology_ == CL_MEM_OBJECT_IMAGE2D) {
     //! @todo: Width has to be aligned for 3D.
     //! Need to be replaced with a compute copy
     // Width aligned by 8 texels
     if (((desc().width_ % 0x8) != 0) ||
-      // Pitch aligned by 64 bytes
-      (((desc().width_ * elementSize()) % 0x40) != 0)) {
+        // Pitch aligned by 64 bytes
+        (((desc().width_ * elementSize()) % 0x40) != 0)) {
       return false;
     }
-  }
-  else {
+  } else {
     //! @todo GSL doesn't support pinning with resAlloc_
     return false;
   }
@@ -978,8 +964,7 @@ bool Resource::CreatePinned(CreateParams* params)
 }
 
 // ================================================================================================
-bool Resource::CreateSvm(CreateParams* params, Pal::gpusize svmPtr)
-{
+bool Resource::CreateSvm(CreateParams* params, Pal::gpusize svmPtr) {
   const bool isFineGrain = (memoryType() == RemoteUSWC) || (memoryType() == Remote);
   size_t allocSize = amd::alignUp(desc().width_ * elementSize_,
                                   dev().properties().gpuMemoryProperties.fragmentSize);
@@ -991,20 +976,18 @@ bool Resource::CreateSvm(CreateParams* params, Pal::gpusize svmPtr)
     if (svmPtr != 0) {
       createInfo.flags.useReservedGpuVa = true;
       createInfo.pReservedGpuVaOwner = params->svmBase_->iMem();
-    }
-    else {
+    } else {
       createInfo.flags.useReservedGpuVa = false;
       createInfo.pReservedGpuVaOwner = nullptr;
     }
     if (!dev().settings().svmFineGrainSystem_) {
-      memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size,
-        createInfo.alignment, createInfo.pReservedGpuVaOwner, &subOffset_);
+      memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment,
+                                                    createInfo.pReservedGpuVaOwner, &subOffset_);
     }
     if (memRef_ == nullptr) {
       memRef_ = GpuMemoryReference::Create(dev(), createInfo);
     }
-  }
-  else {
+  } else {
     Pal::GpuMemoryCreateInfo createInfo = {};
     createInfo.size = allocSize;
     createInfo.alignment = MaxGpuAlignment;
@@ -1015,8 +998,8 @@ bool Resource::CreateSvm(CreateParams* params, Pal::gpusize svmPtr)
       createInfo.pReservedGpuVaOwner = params->svmBase_->iMem();
     }
     memTypeToHeap(&createInfo);
-    memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size,
-      createInfo.alignment, createInfo.pReservedGpuVaOwner, &subOffset_);
+    memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment,
+                                                  createInfo.pReservedGpuVaOwner, &subOffset_);
     if (memRef_ == nullptr) {
       createInfo.alignment = dev().properties().gpuMemoryProperties.fragmentSize;
       memRef_ = GpuMemoryReference::Create(dev(), createInfo);
@@ -1028,9 +1011,9 @@ bool Resource::CreateSvm(CreateParams* params, Pal::gpusize svmPtr)
   }
   desc_.cardMemory_ = false;
   if ((nullptr != params) && (nullptr != params->owner_) &&
-    (nullptr != params->owner_->getSvmPtr())) {
+      (nullptr != params->owner_->getSvmPtr())) {
     params->owner_->setSvmPtr(
-      reinterpret_cast<void*>(memRef_->iMem()->Desc().gpuVirtAddr + subOffset_));
+        reinterpret_cast<void*>(memRef_->iMem()->Desc().gpuVirtAddr + subOffset_));
     offset_ += static_cast<size_t>(subOffset_);
   }
   return true;
@@ -1126,18 +1109,18 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
   Pal::gpusize svmPtr = 0;
   if ((nullptr != params) && (nullptr != params->owner_) &&
       (nullptr != params->owner_->getSvmPtr())) {
-      svmPtr = reinterpret_cast<Pal::gpusize>(params->owner_->getSvmPtr());
-      desc_.SVMRes_ = true;
-      svmPtr = (svmPtr == 1) ? 0 : svmPtr;
+    svmPtr = reinterpret_cast<Pal::gpusize>(params->owner_->getSvmPtr());
+    desc_.SVMRes_ = true;
+    svmPtr = (svmPtr == 1) ? 0 : svmPtr;
   }
   if (desc_.SVMRes_) {
-      return CreateSvm(params, svmPtr);
+    return CreateSvm(params, svmPtr);
   }
 
   Pal::GpuMemoryCreateInfo createInfo = {};
   createInfo.size = desc().width_ * elementSize_;
   createInfo.size = amd::alignUp(createInfo.size, MaxGpuAlignment);
-  createInfo.alignment = desc().scratch_ ? 64*Ki : MaxGpuAlignment;
+  createInfo.alignment = desc().scratch_ ? 64 * Ki : MaxGpuAlignment;
   createInfo.vaRange = Pal::VaRange::Default;
   createInfo.priority = Pal::GpuMemPriority::Normal;
 
@@ -1152,8 +1135,8 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
 
   memTypeToHeap(&createInfo);
   // createInfo.priority;
-  memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size,
-    createInfo.alignment, nullptr, &subOffset_);
+  memRef_ = dev().resourceCache().findGpuMemory(&desc_, createInfo.size, createInfo.alignment,
+                                                nullptr, &subOffset_);
   if (nullptr == memRef_) {
     memRef_ = GpuMemoryReference::Create(dev(), createInfo);
     if (nullptr == memRef_) {
@@ -1172,14 +1155,13 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
 }
 
 // ================================================================================================
-void Resource::free()
-{
+void Resource::free() {
   if (memRef_ == nullptr) {
     return;
   }
 
   const bool wait =
-    (memoryType() != ImageView) && (memoryType() != ImageBuffer) && (memoryType() != View);
+      (memoryType() != ImageView) && (memoryType() != ImageBuffer) && (memoryType() != View);
 
   // OCL has to wait, even if resource is placed in the cache, since reallocation can occur
   // and resource can be reused on another async queue without a wait on a busy operation
@@ -1190,8 +1172,7 @@ void Resource::free()
       for (uint idx = 1; idx < dev().vgpus().size(); ++idx) {
         dev().vgpus()[idx]->waitForEvent(&events_[idx]);
       }
-    }
-    else {
+    } else {
       amd::ScopedLock l(memRef_->gpu_->execution());
       memRef_->gpu_->waitForEvent(&events_[memRef_->gpu_->index()]);
     }
@@ -1232,8 +1213,7 @@ void Resource::free()
 
 // ================================================================================================
 void Resource::writeRawData(VirtualGPU& gpu, size_t offset, size_t size, const void* data,
-                            bool waitForEvent) const
-{
+                            bool waitForEvent) const {
   GpuEvent event;
 
   // Write data size bytes to surface
@@ -1242,7 +1222,7 @@ void Resource::writeRawData(VirtualGPU& gpu, size_t offset, size_t size, const v
   gpu.eventBegin(MainEngine);
   gpu.queue(MainEngine).addCmdMemRef(memRef());
   gpu.iCmd()->CmdUpdateMemory(*iMem(), offset_ + offset, size,
-    reinterpret_cast<const uint32_t*>(data));
+                              reinterpret_cast<const uint32_t*>(data));
   gpu.eventEnd(MainEngine, event);
 
   if (waitForEvent) {
@@ -1259,8 +1239,7 @@ void Resource::writeRawData(VirtualGPU& gpu, size_t offset, size_t size, const v
 }
 
 // ================================================================================================
-static const Pal::ChNumFormat ChannelFmt(uint bytesPerElement)
-{
+static const Pal::ChNumFormat ChannelFmt(uint bytesPerElement) {
   if (bytesPerElement == 16) {
     return Pal::ChNumFormat::X32Y32Z32W32_Uint;
   } else if (bytesPerElement == 8) {
@@ -1292,8 +1271,7 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
   if (desc().buffer_ && !dstResource.desc().buffer_) {
     imageOffsetx = dstOrigin[0] % dstResource.elementSize();
     gpuMemoryOffset = srcOrigin[0] + offset();
-    gpuMemoryRowPitch =
-        (srcOrigin[1]) ? srcOrigin[1] : size[0] * dstResource.elementSize();
+    gpuMemoryRowPitch = (srcOrigin[1]) ? srcOrigin[1] : size[0] * dstResource.elementSize();
     img1Darray = (dstResource.desc().topology_ == CL_MEM_OBJECT_IMAGE1D_ARRAY);
     img2Darray = (dstResource.desc().topology_ == CL_MEM_OBJECT_IMAGE2D_ARRAY);
   } else if (!desc().buffer_ && dstResource.desc().buffer_) {
@@ -1374,7 +1352,8 @@ bool Resource::partialMemCopyTo(VirtualGPU& gpu, const amd::Coord3D& srcOrigin,
     }
     copyRegion.gpuMemoryOffset = gpuMemoryOffset;
     copyRegion.gpuMemoryRowPitch = gpuMemoryRowPitch;
-    copyRegion.gpuMemoryDepthPitch = (dstOrigin[2]) ? dstOrigin[2]
+    copyRegion.gpuMemoryDepthPitch = (dstOrigin[2])
+        ? dstOrigin[2]
         : copyRegion.gpuMemoryRowPitch * copyRegion.imageExtent.height;
     gpu.iCmd()->CmdCopyImageToMemory(*image_, imgLayout, *dstResource.iMem(), 1, &copyRegion);
   } else {
@@ -1819,17 +1798,14 @@ void Resource::unmap(VirtualGPU* gpu) {
 }
 
 // ================================================================================================
-void Resource::unmapLayers(VirtualGPU* gpu) {
-  Unimplemented();
-}
+void Resource::unmapLayers(VirtualGPU* gpu) { Unimplemented(); }
 
 // ================================================================================================
 bool MemorySubAllocator::InitAllocator(GpuMemoryReference* mem_ref) {
-  MemBuddyAllocator* allocator = new MemBuddyAllocator(
-    device_, device_->settings().subAllocationChunkSize_,
-    device_->settings().subAllocationMinSize_);
-  if (!((allocator != nullptr) &&
-        (allocator->Init() == Pal::Result::Success) &&
+  MemBuddyAllocator* allocator =
+      new MemBuddyAllocator(device_, device_->settings().subAllocationChunkSize_,
+                            device_->settings().subAllocationMinSize_);
+  if (!((allocator != nullptr) && (allocator->Init() == Pal::Result::Success) &&
         heaps_.insert({mem_ref, allocator}).second)) {
     mem_ref->release();
     delete allocator;
@@ -1890,8 +1866,7 @@ bool FineMemorySubAllocator::CreateChunk(const Pal::IGpuMemory* reserved_va) {
 }
 
 // ================================================================================================
-MemorySubAllocator::~MemorySubAllocator()
-{
+MemorySubAllocator::~MemorySubAllocator() {
   // Release memory heap for suballocations
   for (const auto& it : heaps_) {
     it.first->release();
@@ -1901,8 +1876,8 @@ MemorySubAllocator::~MemorySubAllocator()
 
 // ================================================================================================
 GpuMemoryReference* MemorySubAllocator::Allocate(Pal::gpusize size, Pal::gpusize alignment,
-  const Pal::IGpuMemory* reserved_va, Pal::gpusize* offset)
-{
+                                                 const Pal::IGpuMemory* reserved_va,
+                                                 Pal::gpusize* offset) {
   GpuMemoryReference* mem_ref = nullptr;
   MemBuddyAllocator* allocator = nullptr;
   // Check if the resource size and alignment are allowed for suballocation
@@ -1927,7 +1902,7 @@ GpuMemoryReference* MemorySubAllocator::Allocate(Pal::gpusize size, Pal::gpusize
       }
       // We didn't find a valid chunk, so create a new one
       if (!CreateChunk(reserved_va)) {
-          return nullptr;
+        return nullptr;
       }
       i++;
     } while (i < 2);
@@ -1936,8 +1911,7 @@ GpuMemoryReference* MemorySubAllocator::Allocate(Pal::gpusize size, Pal::gpusize
 }
 
 // ================================================================================================
-bool MemorySubAllocator::Free(amd::Monitor* monitor, GpuMemoryReference* ref, Pal::gpusize offset)
-{
+bool MemorySubAllocator::Free(amd::Monitor* monitor, GpuMemoryReference* ref, Pal::gpusize offset) {
   bool release_mem = false;
   {
     amd::ScopedLock l(monitor);
@@ -1966,9 +1940,8 @@ ResourceCache::~ResourceCache() { free(); }
 
 // ================================================================================================
 //! \note the cache works in FILO mode
-bool ResourceCache::addGpuMemory(Resource::Descriptor* desc,
-  GpuMemoryReference* ref, Pal::gpusize offset)
-{
+bool ResourceCache::addGpuMemory(Resource::Descriptor* desc, GpuMemoryReference* ref,
+                                 Pal::gpusize offset) {
   bool result = false;
   size_t size = ref->iMem()->Desc().size;
 
@@ -2017,7 +1990,9 @@ bool ResourceCache::addGpuMemory(Resource::Descriptor* desc,
 
 // ================================================================================================
 GpuMemoryReference* ResourceCache::findGpuMemory(Resource::Descriptor* desc, Pal::gpusize size,
-  Pal::gpusize alignment, const Pal::IGpuMemory* reserved_va, Pal::gpusize* offset) {
+                                                 Pal::gpusize alignment,
+                                                 const Pal::IGpuMemory* reserved_va,
+                                                 Pal::gpusize* offset) {
   amd::ScopedLock l(&lockCacheOps_);
   GpuMemoryReference* ref = nullptr;
 
@@ -2051,7 +2026,7 @@ GpuMemoryReference* ResourceCache::findGpuMemory(Resource::Descriptor* desc, Pal
       ref = it.second;
       cacheSize_ -= sizeRes;
       if (entry->type_ == Resource::Local) {
-          lclCacheSize_ -= sizeRes;
+        lclCacheSize_ -= sizeRes;
       }
       delete it.first;
       // Remove the found etry from the cache
@@ -2078,8 +2053,7 @@ bool ResourceCache::free(size_t minCacheEntries) {
 }
 
 // ================================================================================================
-void ResourceCache::removeLast()
-{
+void ResourceCache::removeLast() {
   std::pair<Resource::Descriptor*, GpuMemoryReference*> entry;
   {
     // Protect access to the global data
diff --git a/projects/clr/rocclr/runtime/device/pal/palresource.hpp b/projects/clr/rocclr/runtime/device/pal/palresource.hpp
index 9b4c63f24a..c2fb0bcad0 100644
--- a/projects/clr/rocclr/runtime/device/pal/palresource.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palresource.hpp
@@ -41,11 +41,11 @@ class GpuMemoryReference : public amd::ReferenceCountedObject {
   //! Get PAL memory object
   Pal::IGpuMemory* iMem() const { return gpuMem_; }
 
-  Pal::IGpuMemory* gpuMem_;   //!< PAL GPU memory object
-  void* cpuAddress_;          //!< CPU address of this memory
-  const Device& device_;      //!< GPU device
+  Pal::IGpuMemory* gpuMem_;  //!< PAL GPU memory object
+  void* cpuAddress_;         //!< CPU address of this memory
+  const Device& device_;     //!< GPU device
   //! @note: This field is necessary for the thread safe release only
-  VirtualGPU* gpu_;           //!< Resource will be used only on this queue
+  VirtualGPU* gpu_;  //!< Resource will be used only on this queue
 
  protected:
   //! Default destructor
@@ -186,7 +186,7 @@ class Resource : public amd::HeapObject {
   //! Constructor of 1D Resource object
   Resource(const Device& gpuDev,  //!< GPU device object
            size_t size            //!< Resource size
-           );
+  );
 
   //! Constructor of Image Resource object
   Resource(const Device& gpuDev,          //!< GPU device object
@@ -196,7 +196,7 @@ class Resource : public amd::HeapObject {
            cl_image_format format,        //!< resource format
            cl_mem_object_type imageType,  //!< CL image type
            uint mipLevels = 1             //!< Number of mip levels
-           );
+  );
 
   //! Destructor of the resource
   virtual ~Resource();
@@ -207,7 +207,7 @@ class Resource : public amd::HeapObject {
    */
   virtual bool create(MemoryType memType,       //!< memory type
                       CreateParams* params = 0  //!< special parameters for resource allocation
-                      );
+  );
 
   /*! \brief Copies a subregion of memory from one resource to another
    *
@@ -253,14 +253,13 @@ class Resource : public amd::HeapObject {
   Pal::IGpuMemory* iMem() const { return memRef_->iMem(); }
 
   //! Returns a pointer to the memory reference
-  GpuMemoryReference* memRef() const {return memRef_; }
+  GpuMemoryReference* memRef() const { return memRef_; }
 
   //! Returns global memory offset
   uint64_t vmAddress() const { return iMem()->Desc().gpuVirtAddr + offset_; }
 
   //! Returns global memory offset
-  uint64_t vmSize() const
-    { return desc_.width_ * desc_.height_ * desc_.depth_ * elementSize(); }
+  uint64_t vmSize() const { return desc_.width_ * desc_.height_ * desc_.depth_ * elementSize(); }
 
   //! Returns global memory offset
   bool mipMapped() const { return (desc().mipLevels_ > 1) ? true : false; }
@@ -279,11 +278,11 @@ class Resource : public amd::HeapObject {
             // Optimization for multilayer map/unmap
             uint startLayer = 0,  //!< Start layer for multilayer map
             uint numLayers = 0    //!< End layer for multilayer map
-            );
+  );
 
   //! Unlocks the resource if it was locked
   void unmap(VirtualGPU* gpu  //!< Virtual GPU device object
-             );
+  );
 
   //! Marks the resource as busy
   void setBusy(VirtualGPU& gpu,   //!< Virtual GPU device object
@@ -303,7 +302,7 @@ class Resource : public amd::HeapObject {
                  uint flags = 0,              //!< Map flags
                  size_t rowPitch = 0,         //!< Raw data row pitch
                  size_t slicePitch = 0        //!< Raw data slice pitch
-                 );
+  );
 
   //! Performs host read from the resource GPU memory
   bool hostRead(VirtualGPU* gpu,             //!< Virtual GPU device object
@@ -312,7 +311,7 @@ class Resource : public amd::HeapObject {
                 const amd::Coord3D& size,    //!< The number of bytes to write
                 size_t rowPitch = 0,         //!< Raw data row pitch
                 size_t slicePitch = 0        //!< Raw data slice pitch
-                );
+  );
 
   //! Gets the resource element size
   uint elementSize() const { return elementSize_; }
@@ -377,7 +376,7 @@ class Resource : public amd::HeapObject {
       memRef_ = viewOwner_->memRef_;
       memRef_->retain();
       desc_.width_ = amd::alignUp(size, Pal::Formats::BytesPerPixel(Pal::ChNumFormat::X32_Uint)) /
-        Pal::Formats::BytesPerPixel(Pal::ChNumFormat::X32_Uint);
+          Pal::Formats::BytesPerPixel(Pal::ChNumFormat::X32_Uint);
       setBusy(*memRef()->gpu_, GpuEvent::InvalidID);
     }
   }
@@ -390,33 +389,32 @@ class Resource : public amd::HeapObject {
 
  protected:
   /*! \brief Creates a PAL iamge object, associated with the resource
-  *
-  *  \return True if we succesfully created a PAL resource
-  */
-  bool CreateImage(CreateParams* params //!< special parameters for resource allocation
-                   );
+   *
+   *  \return True if we succesfully created a PAL resource
+   */
+  bool CreateImage(CreateParams* params  //!< special parameters for resource allocation
+  );
 
   /*! \brief Creates a PAL interop object, associated with the resource
-  *
-  *  \return True if we succesfully created a PAL interop resource
-  */
-  bool CreateInterop(CreateParams* params //!< special parameters for resource allocation
-                     );
+   *
+   *  \return True if we succesfully created a PAL interop resource
+   */
+  bool CreateInterop(CreateParams* params  //!< special parameters for resource allocation
+  );
 
   /*! \brief Creates a PAL pinned object, associated with the resource
-  *
-  *  \return True if we succesfully created a PAL pinned resource
-  */
-  bool CreatePinned(CreateParams* params //!< special parameters for resource allocation
-                    );
+   *
+   *  \return True if we succesfully created a PAL pinned resource
+   */
+  bool CreatePinned(CreateParams* params  //!< special parameters for resource allocation
+  );
 
   /*! \brief Creates a PAL SVM object, associated with the resource
-  *
-  *  \return True if we succesfully created a PAL SVM resource
-  */
+   *
+   *  \return True if we succesfully created a PAL SVM resource
+   */
   bool CreateSvm(CreateParams* params,  //!< special parameters for resource allocation
-                 Pal::gpusize svmPtr
-                 );
+                 Pal::gpusize svmPtr);
 
   uint elementSize_;  //!< Size of a single element in bytes
 
@@ -433,11 +431,11 @@ class Resource : public amd::HeapObject {
    */
   void* mapLayers(VirtualGPU* gpu,  //!< Virtual GPU device object
                   uint flags = 0    //!< flags for the map operation
-                  );
+  );
 
   //! Unlocks the resource with layers if it was locked
   void unmapLayers(VirtualGPU* gpu  //!< Virtual GPU device object
-                   );
+  );
 
   //! Calls PAL to map a resource
   void* gpuMemoryMap(size_t* pitch,             //!< Pitch value for the image
@@ -454,7 +452,7 @@ class Resource : public amd::HeapObject {
 
   //! Converts Resource memory type to the PAL heaps
   void memTypeToHeap(Pal::GpuMemoryCreateInfo* createInfo  //!< Memory create info
-                     );
+  );
 
   const Device& gpuDevice_;     //!< GPU device
   Descriptor desc_;             //!< Descriptor for this resource
@@ -462,7 +460,7 @@ class Resource : public amd::HeapObject {
   void* address_;               //!< Physical address of this resource
   size_t offset_;               //!< Resource offset
   GpuMemoryReference* memRef_;  //!< PAL resource reference
-  Pal::gpusize  subOffset_;     //!< GPU memory offset in the oririnal resource
+  Pal::gpusize subOffset_;      //!< GPU memory offset in the oririnal resource
   const Resource* viewOwner_;   //!< GPU resource, which owns this view
   void* glInteropMbRes_;        //!< Mb Res handle
   uint32_t glType_;             //!< GL interop type
@@ -485,41 +483,35 @@ class Resource : public amd::HeapObject {
 typedef Util::BuddyAllocator<Device> MemBuddyAllocator;
 
 class MemorySubAllocator : public amd::HeapObject {
-public:
+ public:
   MemorySubAllocator(Device* device) : device_(device) {}
 
   ~MemorySubAllocator();
 
   //! Create suballocation
-  GpuMemoryReference* Allocate(Pal::gpusize size,
-                               Pal::gpusize alignment,
-                               const Pal::IGpuMemory* reserved_va,
-                               Pal::gpusize* offset
-                               );
+  GpuMemoryReference* Allocate(Pal::gpusize size, Pal::gpusize alignment,
+                               const Pal::IGpuMemory* reserved_va, Pal::gpusize* offset);
   //! Free suballocation
-  bool Free(amd::Monitor* monitor,
-            GpuMemoryReference* mem_ref,
-            Pal::gpusize offset
-            );
+  bool Free(amd::Monitor* monitor, GpuMemoryReference* mem_ref, Pal::gpusize offset);
 
-protected:
+ protected:
   //! Allocate new chunk of memory
   virtual bool CreateChunk(const Pal::IGpuMemory* reserved_va);
   bool InitAllocator(GpuMemoryReference* mem_ref);
 
   Device* device_;
-  std::unordered_map<GpuMemoryReference*, MemBuddyAllocator*>  heaps_;
+  std::unordered_map<GpuMemoryReference*, MemBuddyAllocator*> heaps_;
 };
 
 class CoarseMemorySubAllocator : public MemorySubAllocator {
-public:
+ public:
   CoarseMemorySubAllocator(Device* device) : MemorySubAllocator(device) {}
 
   bool CreateChunk(const Pal::IGpuMemory* reservedVa) override;
 };
 
 class FineMemorySubAllocator : public MemorySubAllocator {
-public:
+ public:
   FineMemorySubAllocator(Device* device) : MemorySubAllocator(device) {}
 
   bool CreateChunk(const Pal::IGpuMemory* reserved_va) override;
@@ -529,29 +521,28 @@ class ResourceCache : public amd::HeapObject {
  public:
   //! Default constructor
   ResourceCache(Device* device, size_t cacheSizeLimit)
-      : lockCacheOps_("PAL resource cache", true)
-      , cacheSize_(0)
-      , lclCacheSize_(0)
-      , cacheSizeLimit_(cacheSizeLimit)
-      , mem_sub_alloc_local_(device)
-      , mem_sub_alloc_coarse_ (device)
-      , mem_sub_alloc_fine_ (device) {}
+      : lockCacheOps_("PAL resource cache", true),
+        cacheSize_(0),
+        lclCacheSize_(0),
+        cacheSizeLimit_(cacheSizeLimit),
+        mem_sub_alloc_local_(device),
+        mem_sub_alloc_coarse_(device),
+        mem_sub_alloc_fine_(device) {}
 
   //! Default destructor
   ~ResourceCache();
 
   //! Adds a PAL resource to the cache
-  bool addGpuMemory(Resource::Descriptor* desc,   //!< Resource descriptor - cache key
-                    GpuMemoryReference*   ref,    //!< Resource reference
-                    Pal::gpusize          offset  //!< Original resource offset
-                    );
+  bool addGpuMemory(Resource::Descriptor* desc,  //!< Resource descriptor - cache key
+                    GpuMemoryReference* ref,     //!< Resource reference
+                    Pal::gpusize offset          //!< Original resource offset
+  );
 
   //! Finds a PAL resource from the cache
   GpuMemoryReference* findGpuMemory(
       Resource::Descriptor* desc,  //!< Resource descriptor - cache key
-      Pal::gpusize size,
-      Pal::gpusize alignment,
-      const Pal::IGpuMemory* reserved_va, //!< Reserved VA for SVM suballocations
+      Pal::gpusize size, Pal::gpusize alignment,
+      const Pal::IGpuMemory* reserved_va,  //!< Reserved VA for SVM suballocations
       Pal::gpusize* offset);
 
   //! Destroys cache
@@ -576,16 +567,17 @@ class ResourceCache : public amd::HeapObject {
 
   amd::Monitor lockCacheOps_;  //!< Lock to serialise cache access
 
-  size_t cacheSize_;            //!< Current cache size in bytes
-  size_t lclCacheSize_;         //!< Local memory stored in the cache
-  const size_t cacheSizeLimit_; //!< Cache size limit in bytes
+  size_t cacheSize_;             //!< Current cache size in bytes
+  size_t lclCacheSize_;          //!< Local memory stored in the cache
+  const size_t cacheSizeLimit_;  //!< Cache size limit in bytes
 
   //! PAL resource cache
   std::list<std::pair<Resource::Descriptor*, GpuMemoryReference*> > resCache_;
 
-  MemorySubAllocator  mem_sub_alloc_local_;  //!< Allocator for suballocations in Local
-  CoarseMemorySubAllocator mem_sub_alloc_coarse_; //!< Allocator for suballocations in Coarse SVM
-  FineMemorySubAllocator mem_sub_alloc_fine_; //!< Allocator for suballocations in Fine SVM
+  MemorySubAllocator mem_sub_alloc_local_;         //!< Allocator for suballocations in Local
+  CoarseMemorySubAllocator mem_sub_alloc_coarse_;  //!< Allocator for suballocations in Coarse SVM
+  FineMemorySubAllocator mem_sub_alloc_fine_;      //!< Allocator for suballocations in Fine SVM
 };
 
-/*@}*/} // namespace pal
+/*@}*/  // namespace pal
+}  // namespace pal
diff --git a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp
index c663831670..131bb4afed 100644
--- a/projects/clr/rocclr/runtime/device/pal/palsettings.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palsettings.cpp
@@ -136,7 +136,7 @@ Settings::Settings() {
   subAllocationMinSize_ = 4 * Ki;
   subAllocationChunkSize_ = 64 * Mi;
   subAllocationMaxSize_ =
-    std::min(static_cast<uint64_t>(GPU_MAX_SUBALLOC_SIZE) * Ki, subAllocationChunkSize_);
+      std::min(static_cast<uint64_t>(GPU_MAX_SUBALLOC_SIZE) * Ki, subAllocationChunkSize_);
 
   maxCmdBuffers_ = 12;
   useLightning_ = GPU_ENABLE_LC;
@@ -148,8 +148,7 @@ Settings::Settings() {
 
 bool Settings::create(const Pal::DeviceProperties& palProp,
                       const Pal::GpuMemoryHeapProperties* heaps, const Pal::WorkStationCaps& wscaps,
-                      bool reportAsOCL12Device)
-{
+                      bool reportAsOCL12Device) {
   uint32_t osVer = 0x0;
 
   // Disable thread trace by default for all devices
@@ -198,8 +197,9 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
     case Pal::AsicRevision::Navi10Lite:
       gfx10Plus_ = true;
       useLightning_ = (!flagIsDefault(GPU_ENABLE_LC)) ? GPU_ENABLE_LC : true;
-      hsailExplicitXnack_ = static_cast<uint>(palProp.gpuMemoryProperties.flags.pageMigrationEnabled
-        || palProp.gpuMemoryProperties.flags.iommuv2Support);
+      hsailExplicitXnack_ =
+          static_cast<uint>(palProp.gpuMemoryProperties.flags.pageMigrationEnabled ||
+                            palProp.gpuMemoryProperties.flags.iommuv2Support);
       enableWgpMode_ = GPU_ENABLE_WGP_MODE;
       if (useLightning_) {
         enableWave32Mode_ = true;
@@ -346,7 +346,7 @@ bool Settings::create(const Pal::DeviceProperties& palProp,
   if (VerifyVersionInfo(&versionInfo, VER_MAJORVERSION | VER_MINORVERSION, conditionMask)) {
     splitSizeForWin7_ = true;  // Update flag of DMA flush split size for Win 7
     if (modifyMaxWorkload.time > 0) {
-      maxWorkloadTime_ = modifyMaxWorkload.time; // Update max workload time
+      maxWorkloadTime_ = modifyMaxWorkload.time;  // Update max workload time
     }
   }
 #endif  // defined(_WIN32)
diff --git a/projects/clr/rocclr/runtime/device/pal/palsettings.hpp b/projects/clr/rocclr/runtime/device/pal/palsettings.hpp
index b6e1d95441..6b8ee86768 100644
--- a/projects/clr/rocclr/runtime/device/pal/palsettings.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palsettings.hpp
@@ -39,63 +39,63 @@ class Settings : public device::Settings {
 
   union {
     struct {
-      uint remoteAlloc_ : 1;             //!< Allocate remote memory for the heap
-      uint stagedXferRead_ : 1;          //!< Uses a staged buffer read
-      uint stagedXferWrite_ : 1;         //!< Uses a staged buffer write
-      uint disablePersistent_ : 1;       //!< Disables using persistent memory for staging
-      uint imageSupport_ : 1;            //!< Report images support
-      uint doublePrecision_ : 1;         //!< Enables double precision support
-      uint use64BitPtr_ : 1;             //!< Use 64bit pointers on GPU
-      uint force32BitOcl20_ : 1;         //!< Force 32bit apps to take CLANG/HSAIL path on GPU
-      uint imageDMA_ : 1;                //!< Enable direct image DMA transfers
-      uint viPlus_ : 1;                  //!< VI and post VI features
-      uint aiPlus_ : 1;                  //!< AI and post AI features
-      uint gfx10Plus_ : 1;               //!< gfx10 and post gfx10 features
-      uint threadTraceEnable_ : 1;       //!< Thread trace enable
-      uint linearPersistentImage_ : 1;   //!< Allocates linear images in persistent
-      uint useSingleScratch_ : 1;        //!< Allocates single scratch per device
-      uint svmAtomics_ : 1;              //!< SVM device atomics
-      uint svmFineGrainSystem_ : 1;      //!< SVM fine grain system support
-      uint useDeviceQueue_ : 1;          //!< Submit to separate device queue
-      uint sdamPageFaultWar_ : 1;        //!< SDMA page fault workaround
-      uint rgpSqttWaitIdle_: 1;          //!< Wait for idle after SQTT trace
-      uint rgpSqttForceDisable_: 1;      //!< Disables SQTT
-      uint splitSizeForWin7_: 1;         //!< DMA flush split size for Win 7
+      uint remoteAlloc_ : 1;            //!< Allocate remote memory for the heap
+      uint stagedXferRead_ : 1;         //!< Uses a staged buffer read
+      uint stagedXferWrite_ : 1;        //!< Uses a staged buffer write
+      uint disablePersistent_ : 1;      //!< Disables using persistent memory for staging
+      uint imageSupport_ : 1;           //!< Report images support
+      uint doublePrecision_ : 1;        //!< Enables double precision support
+      uint use64BitPtr_ : 1;            //!< Use 64bit pointers on GPU
+      uint force32BitOcl20_ : 1;        //!< Force 32bit apps to take CLANG/HSAIL path on GPU
+      uint imageDMA_ : 1;               //!< Enable direct image DMA transfers
+      uint viPlus_ : 1;                 //!< VI and post VI features
+      uint aiPlus_ : 1;                 //!< AI and post AI features
+      uint gfx10Plus_ : 1;              //!< gfx10 and post gfx10 features
+      uint threadTraceEnable_ : 1;      //!< Thread trace enable
+      uint linearPersistentImage_ : 1;  //!< Allocates linear images in persistent
+      uint useSingleScratch_ : 1;       //!< Allocates single scratch per device
+      uint svmAtomics_ : 1;             //!< SVM device atomics
+      uint svmFineGrainSystem_ : 1;     //!< SVM fine grain system support
+      uint useDeviceQueue_ : 1;         //!< Submit to separate device queue
+      uint sdamPageFaultWar_ : 1;       //!< SDMA page fault workaround
+      uint rgpSqttWaitIdle_ : 1;        //!< Wait for idle after SQTT trace
+      uint rgpSqttForceDisable_ : 1;    //!< Disables SQTT
+      uint splitSizeForWin7_ : 1;       //!< DMA flush split size for Win 7
       uint reserved_ : 11;
     };
     uint value_;
   };
 
-  uint oclVersion_;            //!< Reported OpenCL version support
-  uint debugFlags_;            //!< Debug GPU flags
-  uint hwLDSSize_;             //!< HW local data store size
-  uint maxWorkGroupSize_;      //!< Requested workgroup size for this device
-  uint preferredWorkGroupSize_;//!< Requested preferred workgroup size for this device
-  uint workloadSplitSize_;     //!< Workload split size
-  uint minWorkloadTime_;       //!< Minimal workload time in 0.1 ms
-  uint maxWorkloadTime_;       //!< Maximum workload time in 0.1 ms
-  uint blitEngine_;            //!< Blit engine type
-  uint cacheLineSize_;         //!< Cache line size in bytes
-  uint cacheSize_;             //!< L1 cache size in bytes
-  uint numComputeRings_;       //!< 0 - disabled, 1 , 2,.. - the number of compute rings
-  uint numDeviceEvents_;       //!< The number of device events
-  uint numWaitEvents_;         //!< The number of wait events for device enqueue
-  uint hostMemDirectAccess_;   //!< Enables direct access to the host memory
-  uint numScratchWavesPerCu_;  //!< Maximum number of waves when scratch is enabled
-  size_t xferBufSize_;         //!< Transfer buffer size for image copy optimization
-  size_t stagedXferSize_;      //!< Staged buffer size
-  size_t pinnedXferSize_;      //!< Pinned buffer size for transfer
-  size_t pinnedMinXferSize_;   //!< Minimal buffer size for pinned transfer
-  size_t resourceCacheSize_;   //!< Resource cache size in MB
-  size_t numMemDependencies_;  //!< The array size for memory dependencies tracking
-  uint64_t maxAllocSize_;      //!< Maximum single allocation size
-  uint rgpSqttDispCount_;      //!< The number of dispatches captured in SQTT
-  uint maxCmdBuffers_;         //!< Maximum number of command buffers allocated per queue
+  uint oclVersion_;              //!< Reported OpenCL version support
+  uint debugFlags_;              //!< Debug GPU flags
+  uint hwLDSSize_;               //!< HW local data store size
+  uint maxWorkGroupSize_;        //!< Requested workgroup size for this device
+  uint preferredWorkGroupSize_;  //!< Requested preferred workgroup size for this device
+  uint workloadSplitSize_;       //!< Workload split size
+  uint minWorkloadTime_;         //!< Minimal workload time in 0.1 ms
+  uint maxWorkloadTime_;         //!< Maximum workload time in 0.1 ms
+  uint blitEngine_;              //!< Blit engine type
+  uint cacheLineSize_;           //!< Cache line size in bytes
+  uint cacheSize_;               //!< L1 cache size in bytes
+  uint numComputeRings_;         //!< 0 - disabled, 1 , 2,.. - the number of compute rings
+  uint numDeviceEvents_;         //!< The number of device events
+  uint numWaitEvents_;           //!< The number of wait events for device enqueue
+  uint hostMemDirectAccess_;     //!< Enables direct access to the host memory
+  uint numScratchWavesPerCu_;    //!< Maximum number of waves when scratch is enabled
+  size_t xferBufSize_;           //!< Transfer buffer size for image copy optimization
+  size_t stagedXferSize_;        //!< Staged buffer size
+  size_t pinnedXferSize_;        //!< Pinned buffer size for transfer
+  size_t pinnedMinXferSize_;     //!< Minimal buffer size for pinned transfer
+  size_t resourceCacheSize_;     //!< Resource cache size in MB
+  size_t numMemDependencies_;    //!< The array size for memory dependencies tracking
+  uint64_t maxAllocSize_;        //!< Maximum single allocation size
+  uint rgpSqttDispCount_;        //!< The number of dispatches captured in SQTT
+  uint maxCmdBuffers_;           //!< Maximum number of command buffers allocated per queue
+
+  uint64_t subAllocationMinSize_;    //!< Minimum size allowed for suballocations
+  uint64_t subAllocationMaxSize_;    //!< Maximum size allowed with suballocations
+  uint64_t subAllocationChunkSize_;  //!< Chunk size for suballocaitons
 
-  uint64_t subAllocationMinSize_;   //!< Minimum size allowed for suballocations
-  uint64_t subAllocationMaxSize_;   //!< Maximum size allowed with suballocations
-  uint64_t subAllocationChunkSize_; //!< Chunk size for suballocaitons
-  
   amd::LibrarySelector libSelector_;  //!< Select linking libraries for compiler
 
   //! Default constructor
@@ -106,7 +106,7 @@ class Settings : public device::Settings {
               const Pal::GpuMemoryHeapProperties* heaps,  //!< PAL heap settings
               const Pal::WorkStationCaps& wscaps,         //!< PAL  workstation settings
               bool reportAsOCL12Device = false            //!< Report As OpenCL1.2 Device
-              );
+  );
 
  private:
   //! Disable copy constructor
@@ -119,4 +119,5 @@ class Settings : public device::Settings {
   void override();
 };
 
-/*@}*/} // namespace pal
+/*@}*/  // namespace pal
+}  // namespace pal
diff --git a/projects/clr/rocclr/runtime/device/pal/paltimestamp.hpp b/projects/clr/rocclr/runtime/device/pal/paltimestamp.hpp
index 97b2d5e5ca..9691fa71a2 100644
--- a/projects/clr/rocclr/runtime/device/pal/paltimestamp.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/paltimestamp.hpp
@@ -40,7 +40,7 @@ class TimeStamp : public amd::HeapObject {
             Pal::IGpuMemory* iMem,  //!< Buffer with the timer values
             uint memOffset,         //!< Offset in the buffer for the current TS
             address cpuAddr         //!< CPU pointer for the values in memory
-            );
+  );
 
   //! Default destructor
   ~TimeStamp();
@@ -114,4 +114,5 @@ class TimeStampCache : public amd::HeapObject {
   uint tsOffset_;                    //!< Active offset in the current mem object
 };
 
-/*@}*/} // namespace pal
+/*@}*/  // namespace pal
+}  // namespace pal
diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
index 82e7372933..ff8bffefae 100644
--- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
@@ -70,8 +70,7 @@ VirtualGPU::Queue* VirtualGPU::Queue::Create(const VirtualGPU& gpu, Pal::QueueTy
   if (qCreateInfo.engineType == Pal::EngineTypeExclusiveCompute) {
     if (it != gpu.dev().exclusiveComputeEnginesId().end()) {
       qCreateInfo.engineIndex = it->second;
-    }
-    else {
+    } else {
       return nullptr;
     }
   }
@@ -97,8 +96,8 @@ VirtualGPU::Queue* VirtualGPU::Queue::Create(const VirtualGPU& gpu, Pal::QueueTy
   }
 
   size_t allocSize = qSize + max_command_buffers * (cmdSize + fSize);
-  VirtualGPU::Queue* queue = new (allocSize) VirtualGPU::Queue(gpu, palDev,
-    residency_limit, max_command_buffers);
+  VirtualGPU::Queue* queue =
+      new (allocSize) VirtualGPU::Queue(gpu, palDev, residency_limit, max_command_buffers);
   if (queue != nullptr) {
     address addrQ = reinterpret_cast<address>(&queue[1]);
     // Create PAL queue object
@@ -163,16 +162,16 @@ VirtualGPU::Queue::~Queue() {
   }
 }
 
-Pal::Result VirtualGPU::Queue::UpdateAppPowerProfile()
-{
-    std::wstring wsAppPathAndFileName = Device::appProfile()->wsAppPathAndFileName();
+Pal::Result VirtualGPU::Queue::UpdateAppPowerProfile() {
+  std::wstring wsAppPathAndFileName = Device::appProfile()->wsAppPathAndFileName();
 
-    const wchar_t* wAppPathAndName = wsAppPathAndFileName.c_str();
-    // Find the last occurance of the '\\' character and extract the name of the application as wide char.
-    const wchar_t* wAppNamePtr = wcsrchr(wAppPathAndName, '\\');
-    const wchar_t* wAppName = wAppNamePtr ? wAppNamePtr + 1 : wAppPathAndName;
+  const wchar_t* wAppPathAndName = wsAppPathAndFileName.c_str();
+  // Find the last occurance of the '\\' character and extract the name of the application as wide
+  // char.
+  const wchar_t* wAppNamePtr = wcsrchr(wAppPathAndName, '\\');
+  const wchar_t* wAppName = wAppNamePtr ? wAppNamePtr + 1 : wAppPathAndName;
 
-    return iQueue_->UpdateAppPowerProfile(wAppName, wAppPathAndName);
+  return iQueue_->UpdateAppPowerProfile(wAppName, wAppPathAndName);
 }
 
 void VirtualGPU::Queue::addCmdMemRef(GpuMemoryReference* mem) {
@@ -188,8 +187,7 @@ void VirtualGPU::Queue::addCmdMemRef(GpuMemoryReference* mem) {
     memRef.pGpuMemory = iMem;
     palMemRefs_.push_back(memRef);
     // Check SDI memory object
-    if (iMem->Desc().flags.isExternPhys &&
-        (sdiReferences_.find(iMem) == sdiReferences_.end())) {
+    if (iMem->Desc().flags.isExternPhys && (sdiReferences_.find(iMem) == sdiReferences_.end())) {
       sdiReferences_.insert(iMem);
       palSdiRefs_.push_back(iMem);
     }
@@ -268,8 +266,7 @@ bool VirtualGPU::Queue::flush() {
   // Submit command buffer to OS
   Pal::Result result;
   if (gpu_.rgpCaptureEna()) {
-    result = gpu_.dev().rgpCaptureMgr()->TimedQueueSubmit(
-      iQueue_, cmdBufIdCurrent_, submitInfo);
+    result = gpu_.dev().rgpCaptureMgr()->TimedQueueSubmit(iQueue_, cmdBufIdCurrent_, submitInfo);
   } else {
     result = iQueue_->Submit(submitInfo);
   }
@@ -383,28 +380,28 @@ void VirtualGPU::Queue::DumpMemoryReferences() const {
   if (dump.is_open()) {
     dump << start << " Queue: ";
     switch (iQueue_->Type()) {
-    case Pal::QueueTypeCompute:
-      dump << "Compute";
-      break;
-    case Pal::QueueTypeDma:
-      dump << "SDMA";
-      break;
-    default:
-      dump << "unknown";
-      break;
+      case Pal::QueueTypeCompute:
+        dump << "Compute";
+        break;
+      case Pal::QueueTypeDma:
+        dump << "SDMA";
+        break;
+      default:
+        dump << "unknown";
+        break;
     }
     dump << "\n"
-        << "Resident memory resources:\n";
+         << "Resident memory resources:\n";
     uint idx = 0;
     for (auto it : memReferences_) {
       dump << " " << idx << "\t[";
       dump.setf(std::ios::hex, std::ios::basefield);
       dump.setf(std::ios::showbase);
       dump << (it.first)->iMem()->Desc().gpuVirtAddr << ", "
-          << (it.first)->iMem()->Desc().gpuVirtAddr + (it.first)->iMem()->Desc().size;
+           << (it.first)->iMem()->Desc().gpuVirtAddr + (it.first)->iMem()->Desc().size;
       dump.setf(std::ios::dec);
-      dump << "] CbId:" << it.second <<
-          ", Heap: " << (it.first)->iMem()->Desc().preferredHeap << "\n";
+      dump << "] CbId:" << it.second << ", Heap: " << (it.first)->iMem()->Desc().preferredHeap
+           << "\n";
       idx++;
     }
 
@@ -414,8 +411,7 @@ void VirtualGPU::Queue::DumpMemoryReferences() const {
       for (size_t i = 0; i < signature.numParameters(); ++i) {
         const amd::KernelParameterDescriptor& desc = signature.at(i);
         // Find if the current argument is a memory object
-        if ((desc.type_ == T_POINTER) &&
-            (desc.addressQualifier_ != CL_KERNEL_ARG_ADDRESS_LOCAL)) {
+        if ((desc.type_ == T_POINTER) && (desc.addressQualifier_ != CL_KERNEL_ARG_ADDRESS_LOCAL)) {
           dump << " " << desc.name_ << ": " << std::endl;
         }
       }
@@ -519,7 +515,7 @@ void VirtualGPU::MemoryDependency::clear(bool all) {
       // note: The array growth shouldn't occur under the normal conditions,
       // but in a case when SVM path sends the amount of SVM ptrs over
       // the max size of kernel arguments
-      MemoryState* ptr  = new MemoryState[maxMemObjectsInQueue_ << 1];
+      MemoryState* ptr = new MemoryState[maxMemObjectsInQueue_ << 1];
       if (nullptr == ptr) {
         numMemObjectsInQueue_ = 0;
         return;
@@ -527,7 +523,7 @@ void VirtualGPU::MemoryDependency::clear(bool all) {
       maxMemObjectsInQueue_ <<= 1;
       memcpy(ptr, memObjectsInQueue_, sizeof(MemoryState) * numMemObjectsInQueue_);
       delete[] memObjectsInQueue_;
-      memObjectsInQueue_= ptr;
+      memObjectsInQueue_ = ptr;
     }
 
     // Adjust the number of active objects
@@ -748,7 +744,6 @@ VirtualGPU::VirtualGPU(Device& device)
       maskGroups_(1),
       hsaQueueMem_(nullptr),
       cmdAllocator_(nullptr) {
-
   // Note: Virtual GPU device creation must be a thread safe operation
   index_ = gpuDevice_.numOfVgpus_++;
   gpuDevice_.vgpus_.resize(gpuDevice_.numOfVgpus());
@@ -780,8 +775,8 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs,
   createInfo.flags.autoMemoryReuse = false;
   createInfo.allocInfo[Pal::CommandDataAlloc].allocHeap = Pal::GpuHeapGartUswc;
   createInfo.allocInfo[Pal::CommandDataAlloc].allocSize =
-  createInfo.allocInfo[Pal::CommandDataAlloc].suballocSize =
-    VirtualGPU::Queue::MaxCommands * (320 + ((profiling) ? 96 : 0));
+      createInfo.allocInfo[Pal::CommandDataAlloc].suballocSize =
+          VirtualGPU::Queue::MaxCommands * (320 + ((profiling) ? 96 : 0));
 
   createInfo.allocInfo[Pal::EmbeddedDataAlloc].allocHeap = Pal::GpuHeapGartUswc;
   createInfo.allocInfo[Pal::EmbeddedDataAlloc].allocSize = 64 * Ki;
@@ -803,8 +798,9 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs,
 
   const uint firstQueue = (dev().numComputeEngines() > 2) ? 1 : 0;
   uint idx = index() % (dev().numComputeEngines() - firstQueue);
-  uint64_t residency_limit = dev().properties().gpuMemoryProperties.flags.supportPerSubmitMemRefs ? 0 :
-    (dev().properties().gpuMemoryProperties.maxLocalMemSize >> 2);
+  uint64_t residency_limit = dev().properties().gpuMemoryProperties.flags.supportPerSubmitMemRefs
+      ? 0
+      : (dev().properties().gpuMemoryProperties.maxLocalMemSize >> 2);
   uint max_cmd_buffers = dev().settings().maxCmdBuffers_;
 
   if (dev().numComputeEngines()) {
@@ -815,9 +811,9 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs,
     // hwRing_ should be set 0 if forced to have single scratch buffer
     hwRing_ = (dev().settings().useSingleScratch_) ? 0 : idx;
 
-    queues_[MainEngine] = Queue::Create(*this, Pal::QueueTypeCompute, idx + firstQueue,
-                                        cmdAllocator_, rtCUs, priority,
-                                        residency_limit, max_cmd_buffers);
+    queues_[MainEngine] =
+        Queue::Create(*this, Pal::QueueTypeCompute, idx + firstQueue, cmdAllocator_, rtCUs,
+                      priority, residency_limit, max_cmd_buffers);
     if (nullptr == queues_[MainEngine]) {
       return false;
     }
@@ -832,20 +828,19 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs,
         sdma = 1;
       }
 
-      queues_[SdmaEngine] =
-          Queue::Create(*this, Pal::QueueTypeDma, sdma, cmdAllocator_,
-                        amd::CommandQueue::RealTimeDisabled, amd::CommandQueue::Priority::Normal,
-                        residency_limit, max_cmd_buffers);
+      queues_[SdmaEngine] = Queue::Create(
+          *this, Pal::QueueTypeDma, sdma, cmdAllocator_, amd::CommandQueue::RealTimeDisabled,
+          amd::CommandQueue::Priority::Normal, residency_limit, max_cmd_buffers);
       if (nullptr == queues_[SdmaEngine]) {
         return false;
       }
     } else {
-        queues_[SdmaEngine] = Queue::Create(*this, Pal::QueueTypeCompute,
-            idx, cmdAllocator_, rtCUs, amd::CommandQueue::Priority::Normal,
-            residency_limit, max_cmd_buffers);
-        if (nullptr == queues_[SdmaEngine]) {
-            return false;
-        }
+      queues_[SdmaEngine] =
+          Queue::Create(*this, Pal::QueueTypeCompute, idx, cmdAllocator_, rtCUs,
+                        amd::CommandQueue::Priority::Normal, residency_limit, max_cmd_buffers);
+      if (nullptr == queues_[SdmaEngine]) {
+        return false;
+      }
     }
   } else {
     Unimplemented();
@@ -921,7 +916,8 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs,
     bool dbg_vmid = false;
     state_.rgpCaptureEnabled_ = true;
     dev().rgpCaptureMgr()->RegisterTimedQueue(2 * index(), queue(MainEngine).iQueue_, &dbg_vmid);
-    dev().rgpCaptureMgr()->RegisterTimedQueue(2 * index() + 1, queue(SdmaEngine).iQueue_, &dbg_vmid);
+    dev().rgpCaptureMgr()->RegisterTimedQueue(2 * index() + 1, queue(SdmaEngine).iQueue_,
+                                              &dbg_vmid);
   }
 
   return true;
@@ -1511,99 +1507,99 @@ void VirtualGPU::submitMapMemory(amd::MapMemoryCommand& vcmd) {
 void VirtualGPU::submitUnmapMemory(amd::UnmapMemoryCommand& vcmd) {
   bool unmapMip = false;
   amd::Image* amdImage;
-{
-  // Make sure VirtualGPU has an exclusive access to the resources
-  amd::ScopedLock lock(execution());
+  {
+    // Make sure VirtualGPU has an exclusive access to the resources
+    amd::ScopedLock lock(execution());
 
-  pal::Memory* memory = dev().getGpuMemory(&vcmd.memory());
-  amd::Memory* owner = memory->owner();
-  const device::Memory::WriteMapInfo* writeMapInfo = memory->writeMapInfo(vcmd.mapPtr());
-  if (nullptr == writeMapInfo) {
-    LogError("Unmap without map call");
-    return;
-  }
-  profilingBegin(vcmd, true);
-
-  // Check if image is a mipmap and assign a saved view
-  amdImage = owner->asImage();
-  if ((amdImage != nullptr) && (amdImage->getMipLevels() > 1) &&
-      (writeMapInfo->baseMip_ != nullptr)) {
-    // Assign mip level view
-    amdImage = writeMapInfo->baseMip_;
-    // Clear unmap flags from the parent image
-    memory->clearUnmapInfo(vcmd.mapPtr());
-    memory = dev().getGpuMemory(amdImage);
-    unmapMip = true;
-    writeMapInfo = memory->writeMapInfo(vcmd.mapPtr());
-  }
-
-  // We used host memory
-  if ((owner->getHostMem() != nullptr) && memory->isDirectMap()) {
-    if (writeMapInfo->isUnmapWrite()) {
-      // Target is the backing store, so sync
-      owner->signalWrite(nullptr);
-      memory->syncCacheFromHost(*this);
+    pal::Memory* memory = dev().getGpuMemory(&vcmd.memory());
+    amd::Memory* owner = memory->owner();
+    const device::Memory::WriteMapInfo* writeMapInfo = memory->writeMapInfo(vcmd.mapPtr());
+    if (nullptr == writeMapInfo) {
+      LogError("Unmap without map call");
+      return;
     }
-    // Remove memory from VA cache
-    dev().removeVACache(memory);
-  }
-  // data check was added for persistent memory that failed to get aperture
-  // and therefore are treated like a remote resource
-  else if (memory->isPersistentDirectMap() && (memory->data() != nullptr)) {
-    memory->unmap(this);
-  } else if (memory->mapMemory() != nullptr) {
-    if (writeMapInfo->isUnmapWrite()) {
-      amd::Coord3D srcOrigin(0, 0, 0);
-      // Target is a remote resource, so copy
-      assert(memory->mapMemory() != nullptr);
-      if (memory->desc().buffer_) {
-        if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, writeMapInfo->origin_,
-                                  writeMapInfo->origin_, writeMapInfo->region_,
-                                  writeMapInfo->isEntire())) {
-          LogError("submitUnmapMemory() - copy failed");
-          vcmd.setStatus(CL_OUT_OF_RESOURCES);
-        }
-      } else if ((vcmd.memory().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
-        Memory* memoryBuf = memory;
-        amd::Coord3D origin(writeMapInfo->origin_[0]);
-        amd::Coord3D size(writeMapInfo->region_[0]);
-        size_t elemSize = vcmd.memory().asImage()->getImageFormat().getElementSize();
-        origin.c[0] *= elemSize;
-        size.c[0] *= elemSize;
+    profilingBegin(vcmd, true);
 
-        amd::Memory* bufferFromImage = createBufferFromImage(vcmd.memory());
-        if (nullptr == bufferFromImage) {
-          LogError("We should not fail buffer creation from image_buffer!");
+    // Check if image is a mipmap and assign a saved view
+    amdImage = owner->asImage();
+    if ((amdImage != nullptr) && (amdImage->getMipLevels() > 1) &&
+        (writeMapInfo->baseMip_ != nullptr)) {
+      // Assign mip level view
+      amdImage = writeMapInfo->baseMip_;
+      // Clear unmap flags from the parent image
+      memory->clearUnmapInfo(vcmd.mapPtr());
+      memory = dev().getGpuMemory(amdImage);
+      unmapMip = true;
+      writeMapInfo = memory->writeMapInfo(vcmd.mapPtr());
+    }
+
+    // We used host memory
+    if ((owner->getHostMem() != nullptr) && memory->isDirectMap()) {
+      if (writeMapInfo->isUnmapWrite()) {
+        // Target is the backing store, so sync
+        owner->signalWrite(nullptr);
+        memory->syncCacheFromHost(*this);
+      }
+      // Remove memory from VA cache
+      dev().removeVACache(memory);
+    }
+    // data check was added for persistent memory that failed to get aperture
+    // and therefore are treated like a remote resource
+    else if (memory->isPersistentDirectMap() && (memory->data() != nullptr)) {
+      memory->unmap(this);
+    } else if (memory->mapMemory() != nullptr) {
+      if (writeMapInfo->isUnmapWrite()) {
+        amd::Coord3D srcOrigin(0, 0, 0);
+        // Target is a remote resource, so copy
+        assert(memory->mapMemory() != nullptr);
+        if (memory->desc().buffer_) {
+          if (!blitMgr().copyBuffer(*memory->mapMemory(), *memory, writeMapInfo->origin_,
+                                    writeMapInfo->origin_, writeMapInfo->region_,
+                                    writeMapInfo->isEntire())) {
+            LogError("submitUnmapMemory() - copy failed");
+            vcmd.setStatus(CL_OUT_OF_RESOURCES);
+          }
+        } else if ((vcmd.memory().getType() == CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
+          Memory* memoryBuf = memory;
+          amd::Coord3D origin(writeMapInfo->origin_[0]);
+          amd::Coord3D size(writeMapInfo->region_[0]);
+          size_t elemSize = vcmd.memory().asImage()->getImageFormat().getElementSize();
+          origin.c[0] *= elemSize;
+          size.c[0] *= elemSize;
+
+          amd::Memory* bufferFromImage = createBufferFromImage(vcmd.memory());
+          if (nullptr == bufferFromImage) {
+            LogError("We should not fail buffer creation from image_buffer!");
+          } else {
+            memoryBuf = dev().getGpuMemory(bufferFromImage);
+          }
+          if (!blitMgr().copyBuffer(*memory->mapMemory(), *memoryBuf, srcOrigin, origin, size,
+                                    writeMapInfo->isEntire())) {
+            LogError("submitUnmapMemory() - copy failed");
+            vcmd.setStatus(CL_OUT_OF_RESOURCES);
+          }
+          if (nullptr != bufferFromImage) {
+            bufferFromImage->release();
+          }
         } else {
-          memoryBuf = dev().getGpuMemory(bufferFromImage);
-        }
-        if (!blitMgr().copyBuffer(*memory->mapMemory(), *memoryBuf, srcOrigin, origin, size,
-                                  writeMapInfo->isEntire())) {
-          LogError("submitUnmapMemory() - copy failed");
-          vcmd.setStatus(CL_OUT_OF_RESOURCES);
-        }
-        if (nullptr != bufferFromImage) {
-          bufferFromImage->release();
-        }
-      } else {
-        if (!blitMgr().copyBufferToImage(*memory->mapMemory(), *memory, srcOrigin,
-                                         writeMapInfo->origin_, writeMapInfo->region_,
-                                         writeMapInfo->isEntire())) {
-          LogError("submitUnmapMemory() - copy failed");
-          vcmd.setStatus(CL_OUT_OF_RESOURCES);
+          if (!blitMgr().copyBufferToImage(*memory->mapMemory(), *memory, srcOrigin,
+                                           writeMapInfo->origin_, writeMapInfo->region_,
+                                           writeMapInfo->isEntire())) {
+            LogError("submitUnmapMemory() - copy failed");
+            vcmd.setStatus(CL_OUT_OF_RESOURCES);
+          }
         }
       }
+    } else {
+      LogError("Unhandled unmap!");
+      vcmd.setStatus(CL_INVALID_VALUE);
     }
-  } else {
-    LogError("Unhandled unmap!");
-    vcmd.setStatus(CL_INVALID_VALUE);
+
+    // Clear unmap flags
+    memory->clearUnmapInfo(vcmd.mapPtr());
+
+    profilingEnd(vcmd);
   }
-
-  // Clear unmap flags
-  memory->clearUnmapInfo(vcmd.mapPtr());
-
-  profilingEnd(vcmd);
-}
   // Release a view for a mipmap map
   if (unmapMip) {
     // Memory release should be outside of the execution lock,
@@ -1700,9 +1696,9 @@ void VirtualGPU::submitCopyMemoryP2P(amd::CopyMemoryP2PCommand& cmd) {
   profilingBegin(cmd);
 
   Memory* srcDevMem = static_cast<pal::Memory*>(
-    cmd.source().getDeviceMemory(*cmd.source().getContext().devices()[0]));
+      cmd.source().getDeviceMemory(*cmd.source().getContext().devices()[0]));
   Memory* dstDevMem = static_cast<pal::Memory*>(
-    cmd.destination().getDeviceMemory(*cmd.destination().getContext().devices()[0]));
+      cmd.destination().getDeviceMemory(*cmd.destination().getContext().devices()[0]));
 
   bool p2pAllowed = false;
 #if 0
@@ -1728,16 +1724,15 @@ void VirtualGPU::submitCopyMemoryP2P(amd::CopyMemoryP2PCommand& cmd) {
       amd::Coord3D dstOrigin(cmd.dstOrigin()[0]);
 
       if (p2pAllowed) {
-        result = blitMgr().copyBuffer(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin,
-                                      size, cmd.isEntireMemory());
-      }
-      else {
+        result = blitMgr().copyBuffer(*srcDevMem, *dstDevMem, srcOrigin, dstOrigin, size,
+                                      cmd.isEntireMemory());
+      } else {
         amd::ScopedLock lock(dev().P2PStageOps());
         Memory* dstStgMem = static_cast<pal::Memory*>(
-          dev().P2PStage()->getDeviceMemory(*cmd.source().getContext().devices()[0]));
+            dev().P2PStage()->getDeviceMemory(*cmd.source().getContext().devices()[0]));
         Memory* srcStgMem = static_cast<pal::Memory*>(
-          dev().P2PStage()->getDeviceMemory(*cmd.destination().getContext().devices()[0]));
-          
+            dev().P2PStage()->getDeviceMemory(*cmd.destination().getContext().devices()[0]));
+
         size_t copy_size = Device::kP2PStagingSize;
         size_t left_size = size[0];
         amd::Coord3D stageOffset(0);
@@ -1750,11 +1745,11 @@ void VirtualGPU::submitCopyMemoryP2P(amd::CopyMemoryP2PCommand& cmd) {
           amd::Coord3D cpSize(copy_size);
 
           // Perform 2 step transfer with staging buffer
-          result &= dev().xferMgr().copyBuffer(
-            *srcDevMem, *dstStgMem, srcOrigin, stageOffset, cpSize);
+          result &=
+              dev().xferMgr().copyBuffer(*srcDevMem, *dstStgMem, srcOrigin, stageOffset, cpSize);
           srcOrigin.c[0] += copy_size;
-          result &= dstDevMem->dev().xferMgr().copyBuffer(
-            *srcStgMem, *dstDevMem, stageOffset, dstOrigin, cpSize);
+          result &= dstDevMem->dev().xferMgr().copyBuffer(*srcStgMem, *dstDevMem, stageOffset,
+                                                          dstOrigin, cpSize);
           dstOrigin.c[0] += copy_size;
         } while (left_size > 0);
       }
@@ -1940,10 +1935,8 @@ void VirtualGPU::submitSvmFreeMemory(amd::SvmFreeMemoryCommand& vcmd) {
 }
 
 // ================================================================================================
-void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQueue)
-{
-  AmdAqlWrap* wraps =
-      (AmdAqlWrap*)(&((AmdVQueueHeader*)gpuDefQueue->virtualQueue_->data())[1]);
+void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQueue) {
+  AmdAqlWrap* wraps = (AmdAqlWrap*)(&((AmdVQueueHeader*)gpuDefQueue->virtualQueue_->data())[1]);
   uint p = 0;
   for (uint i = 0; i < gpuDefQueue->vqHeader_->aql_slot_num; ++i) {
     if (wraps[i].state != 0) {
@@ -1963,11 +1956,9 @@ void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQ
       print << "\twait_list: " << wraps[i].wait_list << "\n";
       print << "\twait_num: " << wraps[i].wait_num << "\n";
       uint offsEvents = wraps[i].wait_list - gpuDefQueue->virtualQueue_->vmAddress();
-      size_t* events =
-          reinterpret_cast<size_t*>(gpuDefQueue->virtualQueue_->data() + offsEvents);
+      size_t* events = reinterpret_cast<size_t*>(gpuDefQueue->virtualQueue_->data() + offsEvents);
       for (j = 0; j < wraps[i].wait_num; ++j) {
-        uint offs =
-            static_cast<uint64_t>(events[j]) - gpuDefQueue->virtualQueue_->vmAddress();
+        uint offs = static_cast<uint64_t>(events[j]) - gpuDefQueue->virtualQueue_->vmAddress();
         AmdEvent* eventD = (AmdEvent*)(gpuDefQueue->virtualQueue_->data() + offs);
         print << "Wait Event#: " << j << "\n";
         print << "\tState: " << eventD->state << "; Counter: " << eventD->counter << "\n";
@@ -1980,8 +1971,8 @@ void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQ
       print << wraps[i].aql.grid_size_z << "]\n";
 
       HSAILKernel* child = nullptr;
-      for (auto it = hsaKernel.prog().kernels().begin();
-        it != hsaKernel.prog().kernels().end(); ++it) {
+      for (auto it = hsaKernel.prog().kernels().begin(); it != hsaKernel.prog().kernels().end();
+           ++it) {
         if (wraps[i].aql.kernel_object == static_cast<HSAILKernel*>(it->second)->gpuAqlCode()) {
           child = static_cast<HSAILKernel*>(it->second);
         }
@@ -1995,7 +1986,7 @@ void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQ
       uint offsArg = kernarg_address - gpuDefQueue->virtualQueue_->vmAddress();
       address argum = gpuDefQueue->virtualQueue_->data() + offsArg;
       print << "Kernel: " << child->name() << "\n";
-      const amd::KernelSignature&  signature = child->signature();
+      const amd::KernelSignature& signature = child->signature();
 
       // Check if runtime has to setup hidden arguments
       for (const auto it : signature.parameters()) {
@@ -2033,7 +2024,7 @@ void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQ
           continue;
         }
         print << "\t" << it.name_ << ": ";
-        for (int s = it.size_- 1; s >= 0; --s) {
+        for (int s = it.size_ - 1; s >= 0; --s) {
           print.width(2);
           print.fill('0');
           print << static_cast<uint32_t>(argum[s]);
@@ -2047,26 +2038,20 @@ void VirtualGPU::PrintChildren(const HSAILKernel& hsaKernel, VirtualGPU* gpuDefQ
 }
 
 // ================================================================================================
-bool VirtualGPU::PreDeviceEnqueue(
-    const amd::Kernel& kernel,
-    const HSAILKernel& hsaKernel,
-    VirtualGPU** gpuDefQueue,
-    uint64_t* vmDefQueue)
-{
+bool VirtualGPU::PreDeviceEnqueue(const amd::Kernel& kernel, const HSAILKernel& hsaKernel,
+                                  VirtualGPU** gpuDefQueue, uint64_t* vmDefQueue) {
   amd::DeviceQueue* defQueue = kernel.program().context().defDeviceQueue(dev());
   if (nullptr == defQueue) {
     LogError("Default device queue wasn't allocated");
     return false;
-  }
-  else {
+  } else {
     if (dev().settings().useDeviceQueue_) {
       *gpuDefQueue = static_cast<VirtualGPU*>(defQueue->vDev());
       if ((*gpuDefQueue)->hwRing() == hwRing()) {
         LogError("Can't submit the child kernels to the same HW ring as the host queue!");
         return false;
       }
-    }
-    else {
+    } else {
       createVirtualQueue(defQueue->size());
       *gpuDefQueue = this;
     }
@@ -2086,15 +2071,10 @@ bool VirtualGPU::PreDeviceEnqueue(
 }
 
 // ================================================================================================
-void VirtualGPU::PostDeviceEnqueue(
-    const amd::Kernel& kernel,
-    const HSAILKernel& hsaKernel,
-    VirtualGPU* gpuDefQueue,
-    uint64_t vmDefQueue,
-    uint64_t vmParentWrap,
-    GpuEvent* gpuEvent)
-{
-  uint32_t id  = gpuEvent->id_;
+void VirtualGPU::PostDeviceEnqueue(const amd::Kernel& kernel, const HSAILKernel& hsaKernel,
+                                   VirtualGPU* gpuDefQueue, uint64_t vmDefQueue,
+                                   uint64_t vmParentWrap, GpuEvent* gpuEvent) {
+  uint32_t id = gpuEvent->id_;
   amd::DeviceQueue* defQueue = kernel.program().context().defDeviceQueue(dev());
 
   // Make sure exculsive access to the device queue
@@ -2110,16 +2090,16 @@ void VirtualGPU::PostDeviceEnqueue(
     // Add the termination handshake to the host queue
     eventBegin(MainEngine);
     iCmd()->CmdVirtualQueueHandshake(vmParentWrap + offsetof(AmdAqlWrap, state), AQL_WRAP_DONE,
-      vmParentWrap + offsetof(AmdAqlWrap, child_counter), 0,
-      dev().settings().useDeviceQueue_);
+                                     vmParentWrap + offsetof(AmdAqlWrap, child_counter), 0,
+                                     dev().settings().useDeviceQueue_);
     eventEnd(MainEngine, *gpuEvent);
   }
 
   // Get the global loop start before the scheduler
   Pal::gpusize loopStart = gpuDefQueue->iCmd()->CmdVirtualQueueDispatcherStart();
   static_cast<KernelBlitManager&>(gpuDefQueue->blitMgr())
-    .runScheduler(*gpuDefQueue->virtualQueue_, *gpuDefQueue->schedParams_, 0,
-      gpuDefQueue->vqHeader_->aql_slot_num / (DeviceQueueMaskSize * maskGroups_));
+      .runScheduler(*gpuDefQueue->virtualQueue_, *gpuDefQueue->schedParams_, 0,
+                    gpuDefQueue->vqHeader_->aql_slot_num / (DeviceQueueMaskSize * maskGroups_));
   const static bool FlushL2 = true;
   gpuDefQueue->addBarrier(RgpSqqtBarrierReason::PostDeviceEnqueue, FlushL2);
 
@@ -2127,8 +2107,7 @@ void VirtualGPU::PostDeviceEnqueue(
   //! @note DMA flush must not occur between patch and the scheduler
   Pal::gpusize patchStart = gpuDefQueue->iCmd()->CmdVirtualQueueDispatcherStart();
   // Program parameters for the scheduler
-  SchedulerParam* param = reinterpret_cast<SchedulerParam*>(
-    gpuDefQueue->schedParams_->data());
+  SchedulerParam* param = reinterpret_cast<SchedulerParam*>(gpuDefQueue->schedParams_->data());
   param->signal = 1;
   // Scale clock to 1024 to avoid 64 bit div in the scheduler
   param->eng_clk = (1000 * 1024) / dev().info().maxEngineClockFrequency_;
@@ -2147,8 +2126,7 @@ void VirtualGPU::PostDeviceEnqueue(
     param->numMaxWaves = 32 * dev().info().maxComputeUnits_;
     param->scratchOffset = dev().scratch(gpuDefQueue->hwRing())->offset_;
     addVmMemory(scratchBuf);
-  }
-  else {
+  } else {
     param->numMaxWaves = 0;
     param->scratchSize = 0;
     param->scratch = 0;
@@ -2162,8 +2140,8 @@ void VirtualGPU::PostDeviceEnqueue(
   Pal::gpusize signalAddr = gpuDefQueue->schedParams_->vmAddress();
   gpuDefQueue->eventBegin(MainEngine);
   gpuDefQueue->iCmd()->CmdVirtualQueueDispatcherEnd(
-    signalAddr, loopStart,
-    gpuDefQueue->vqHeader_->aql_slot_num / (DeviceQueueMaskSize * maskGroups_));
+      signalAddr, loopStart,
+      gpuDefQueue->vqHeader_->aql_slot_num / (DeviceQueueMaskSize * maskGroups_));
   // Note: Device enqueue can't have extra commands after INDIRECT_BUFFER call.
   // Thus TS command for profiling has to follow in the next CB.
   constexpr bool ForceSubmitFirst = true;
@@ -2173,10 +2151,10 @@ void VirtualGPU::PostDeviceEnqueue(
     // Add the termination handshake to the host queue
     eventBegin(MainEngine);
     iCmd()->CmdVirtualQueueHandshake(vmParentWrap + offsetof(AmdAqlWrap, state), AQL_WRAP_DONE,
-      vmParentWrap + offsetof(AmdAqlWrap, child_counter),
-      signalAddr, dev().settings().useDeviceQueue_);
+                                     vmParentWrap + offsetof(AmdAqlWrap, child_counter), signalAddr,
+                                     dev().settings().useDeviceQueue_);
     if (id != gpuEvent->id_) {
-        LogError("Something is wrong. ID mismatch!\n");
+      LogError("Something is wrong. ID mismatch!\n");
     }
     eventEnd(MainEngine, *gpuEvent);
   }
@@ -2193,7 +2171,8 @@ void VirtualGPU::submitKernel(amd::NDRangeKernelCommand& vcmd) {
   profilingBegin(vcmd);
 
   // Submit kernel to HW
-  if (!submitKernelInternal(vcmd.sizes(), vcmd.kernel(), vcmd.parameters(), false, &vcmd.event(), vcmd.sharedMemBytes())) {
+  if (!submitKernelInternal(vcmd.sizes(), vcmd.kernel(), vcmd.parameters(), false, &vcmd.event(),
+                            vcmd.sharedMemBytes())) {
     vcmd.setStatus(CL_INVALID_OPERATION);
   }
 
@@ -2203,10 +2182,9 @@ void VirtualGPU::submitKernel(amd::NDRangeKernelCommand& vcmd) {
 // ================================================================================================
 bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const amd::Kernel& kernel,
                                       const_address parameters, bool nativeMem,
-                                      amd::Event* enqueueEvent, uint32_t sharedMemBytes)
-{
-  size_t newOffset[3] = { 0, 0, 0 };
-  size_t newGlobalSize[3] = { 0, 0, 0 };
+                                      amd::Event* enqueueEvent, uint32_t sharedMemBytes) {
+  size_t newOffset[3] = {0, 0, 0};
+  size_t newGlobalSize[3] = {0, 0, 0};
 
   int dim = -1;
   int iteration = 1;
@@ -2221,17 +2199,17 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
 
   // If RGP capturing is enabled, then start SQTT trace
   if (rgpCaptureEna()) {
-    size_t newLocalSize[3] = { 1, 1, 1 };
+    size_t newLocalSize[3] = {1, 1, 1};
     for (uint i = 0; i < sizes.dimensions(); i++) {
       if (sizes.local()[i] != 0) {
         newLocalSize[i] = sizes.local()[i];
       }
     }
-    dev().rgpCaptureMgr()->PreDispatch(this, hsaKernel,
-      // Report global size in workgroups, since that's the RGP trace semantics
-      newGlobalSize[0] / newLocalSize[0],
-      newGlobalSize[1] / newLocalSize[1],
-      newGlobalSize[2] / newLocalSize[2]);
+    dev().rgpCaptureMgr()->PreDispatch(
+        this, hsaKernel,
+        // Report global size in workgroups, since that's the RGP trace semantics
+        newGlobalSize[0] / newLocalSize[0], newGlobalSize[1] / newLocalSize[1],
+        newGlobalSize[2] / newLocalSize[2]);
   }
 
   bool printfEnabled = (hsaKernel.printfInfo().size() > 0) ? true : false;
@@ -2257,8 +2235,8 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
 
   // Check memory dependency and SVM objects
   if (!processMemObjectsHSA(kernel, parameters, nativeMem, ldsSize)) {
-      LogError("Wrong memory objects!");
-      return false;
+    LogError("Wrong memory objects!");
+    return false;
   }
   bool needFlush = false;
   // Avoid flushing when PerfCounter is enabled, to make sure PerfStart/dispatch/PerfEnd
@@ -2305,15 +2283,14 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
       // an extra loop is required.
       const amd::KernelParameters& kernelParams = kernel.parameters();
       amd::Memory* const* memories =
-        reinterpret_cast<amd::Memory* const*>(parameters + kernelParams.memoryObjOffset());
+          reinterpret_cast<amd::Memory* const*>(parameters + kernelParams.memoryObjOffset());
       for (uint32_t i = 0; i < kernel.signature().numMemories(); ++i) {
         if (nativeMem) {
           Memory* gpuMem = reinterpret_cast<Memory* const*>(memories)[i];
           if (gpuMem != nullptr) {
             gpuMem->setBusy(*this, gpuEvent);
           }
-        }
-        else {
+        } else {
           amd::Memory* mem = memories[i];
           if (mem != nullptr) {
             dev().getGpuMemory(mem)->setBusy(*this, gpuEvent);
@@ -2325,7 +2302,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
     uint64_t vmParentWrap = 0;
     // Program the kernel arguments for the GPU execution
     hsa_kernel_dispatch_packet_t* aqlPkt = hsaKernel.loadArguments(
-      *this, kernel, tmpSizes, parameters, ldsSize + sharedMemBytes, vmDefQueue, &vmParentWrap);
+        *this, kernel, tmpSizes, parameters, ldsSize + sharedMemBytes, vmDefQueue, &vmParentWrap);
     if (nullptr == aqlPkt) {
       LogError("Couldn't load kernel arguments");
       return false;
@@ -2348,8 +2325,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
     }
     dispatchParam.pCpuAqlCode = hsaKernel.cpuAqlCode();
     dispatchParam.hsaQueueVa = hsaQueueMem_->vmAddress();
-    dispatchParam.wavesPerSh = (enqueueEvent != nullptr) ?
-      enqueueEvent->profilingInfo().waves_ : 0;
+    dispatchParam.wavesPerSh = (enqueueEvent != nullptr) ? enqueueEvent->profilingInfo().waves_ : 0;
     dispatchParam.useAtc = dev().settings().svmFineGrainSystem_ ? true : false;
     dispatchParam.workitemPrivateSegmentSize = hsaKernel.spillSegSize();
     dispatchParam.kernargSegmentSize = hsaKernel.argsBufferSize();
@@ -2660,7 +2636,6 @@ void VirtualGPU::submitSignal(amd::SignalCommand& vcmd) {
     eventEnd(MainEngine, gpuEvent);
 
   } else if (vcmd.type() == CL_COMMAND_WRITE_SIGNAL_AMD) {
-
     EngineType activeEngineID = engineID_;
     engineID_ = static_cast<EngineType>(pGpuMemory->getGpuEvent(*this)->engineId_);
 
@@ -2669,8 +2644,8 @@ void VirtualGPU::submitSignal(amd::SignalCommand& vcmd) {
     addBarrier(RgpSqqtBarrierReason::SignalSubmit, FlushL2);
     // Workarounds: We had systems where an extra delay was necessary.
     {
-        // Flush CB associated with the DGMA buffer
-        isDone(pGpuMemory->getGpuEvent(*this));
+      // Flush CB associated with the DGMA buffer
+      isDone(pGpuMemory->getGpuEvent(*this));
     }
 
     eventBegin(engineID_);
@@ -2711,10 +2686,11 @@ void VirtualGPU::submitMakeBuffersResident(amd::MakeBuffersResidentCommand& vcmd
     pGpuMems[i] = pGpuMemory->iMem();
   }
 
-  dev().iDev()->AddGpuMemoryReferences(numObjects, pGpuMemRef, queues_[MainEngine]->iQueue_, Pal::GpuMemoryRefCantTrim);
+  dev().iDev()->AddGpuMemoryReferences(numObjects, pGpuMemRef, queues_[MainEngine]->iQueue_,
+                                       Pal::GpuMemoryRefCantTrim);
   dev().iDev()->InitBusAddressableGpuMemory(queues_[MainEngine]->iQueue_, numObjects, pGpuMems);
   if (numObjects != 0) {
-      dev().iDev()->RemoveGpuMemoryReferences(numObjects, &pGpuMems[0], queues_[MainEngine]->iQueue_);
+    dev().iDev()->RemoveGpuMemoryReferences(numObjects, &pGpuMems[0], queues_[MainEngine]->iQueue_);
   }
 
   for (uint i = 0; i < numObjects; i++) {
@@ -3104,8 +3080,8 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p
         break;
     }
     // get svm non arugment information
-    void* const* svmPtrArray = reinterpret_cast<void* const*>(
-      params + kernelParams.getExecInfoOffset());
+    void* const* svmPtrArray =
+        reinterpret_cast<void* const*>(params + kernelParams.getExecInfoOffset());
     for (size_t i = 0; i < count; i++) {
       amd::Memory* memory = amd::MemObjMap::FindMemObj(svmPtrArray[i]);
       if (nullptr == memory) {
@@ -3149,8 +3125,7 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p
   bool srdResource = false;
   amd::Memory* const* memories =
       reinterpret_cast<amd::Memory* const*>(params + kernelParams.memoryObjOffset());
-  const HSAILKernel& hsaKernel =
-      static_cast<const HSAILKernel&>(*(kernel.getDeviceKernel(dev())));
+  const HSAILKernel& hsaKernel = static_cast<const HSAILKernel&>(*(kernel.getDeviceKernel(dev())));
   const amd::KernelSignature& signature = kernel.signature();
   ldsAddress = hsaKernel.ldsSize();
 
@@ -3225,10 +3200,10 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p
           addVmMemory(gpuMem);
           const void* globalAddress = *reinterpret_cast<const void* const*>(params + desc.offset_);
           LogPrintfInfo("!\targ%d: %s %s = ptr:%p obj:[%p-%p] threadId : %zx\n", index,
-            desc.typeName_.c_str(), desc.name_.c_str(),
-            globalAddress, reinterpret_cast<void*>(gpuMem->vmAddress()),
-            reinterpret_cast<void*>(gpuMem->vmAddress() + gpuMem->size()),
-            std::this_thread::get_id());
+                        desc.typeName_.c_str(), desc.name_.c_str(), globalAddress,
+                        reinterpret_cast<void*>(gpuMem->vmAddress()),
+                        reinterpret_cast<void*>(gpuMem->vmAddress() + gpuMem->size()),
+                        std::this_thread::get_id());
 
           //! Check if compiler expects read/write.
           //! Note: SVM with subbuffers has an issue with tracking.
@@ -3255,30 +3230,28 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p
             }
             if (gpuMem->desc().isDoppTexture_) {
               addDoppRef(gpuMem, kernel.parameters().getExecNewVcop(),
-                kernel.parameters().getExecPfpaVcop());
+                         kernel.parameters().getExecPfpaVcop());
             }
           }
         }
       }
-    }
-    else if (desc.type_ == T_VOID) {
+    } else if (desc.type_ == T_VOID) {
       if (desc.info_.oclObject_ == amd::KernelParameterDescriptor::ReferenceObject) {
         // Copy the current structure into CB1
-        size_t gpuPtr = static_cast<size_t>(cb(1)->UploadDataToHw(
-          params + desc.offset_, desc.size_));
+        size_t gpuPtr =
+            static_cast<size_t>(cb(1)->UploadDataToHw(params + desc.offset_, desc.size_));
         // Then use a pointer in aqlArgBuffer to CB1
         const auto it = hsaKernel.patch().find(desc.offset_);
         // Patch the GPU VA address in the original arguments
         WriteAqlArgAt(const_cast<address>(params), &gpuPtr, sizeof(size_t), it->second);
         addVmMemory(cb(1)->ActiveMemory());
       }
-    }
-    else if (desc.type_ == T_SAMPLER) {
+    } else if (desc.type_ == T_SAMPLER) {
       srdResource = true;
     } else if (desc.type_ == T_QUEUE) {
       uint32_t index = desc.info_.arrayIndex_;
-      const amd::DeviceQueue* queue = reinterpret_cast<amd::DeviceQueue* const*>(
-        params + kernelParams.queueObjOffset())[index];
+      const amd::DeviceQueue* queue =
+          reinterpret_cast<amd::DeviceQueue* const*>(params + kernelParams.queueObjOffset())[index];
       VirtualGPU* gpuQueue = static_cast<VirtualGPU*>(queue->vDev());
       uint64_t vmQueue;
       if (dev().settings().useDeviceQueue_) {
diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp
index 13c83b3796..9e557e1f03 100644
--- a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp
@@ -51,17 +51,18 @@ class VirtualGPU : public device::VirtualDevice {
     Queue(const Queue&) = delete;
     Queue& operator=(const Queue&) = delete;
 
-    static Queue* Create(const VirtualGPU& gpu,                //!< OCL virtual GPU object
-                         Pal::QueueType queueType,             //!< PAL queue type
-                         uint engineIdx,                       //!< Select particular engine index
-                         Pal::ICmdAllocator* cmdAlloc,         //!< PAL CMD buffer allocator
-                         uint rtCU,                            //!< The number of reserved CUs
-                         amd::CommandQueue::Priority priority, //!< Queue priority
-                         uint64_t residency_limit,             //!< Enables residency limit
-                         uint max_command_buffers              //!< Number of allocated command buffers
-                         );
+    static Queue* Create(const VirtualGPU& gpu,                 //!< OCL virtual GPU object
+                         Pal::QueueType queueType,              //!< PAL queue type
+                         uint engineIdx,                        //!< Select particular engine index
+                         Pal::ICmdAllocator* cmdAlloc,          //!< PAL CMD buffer allocator
+                         uint rtCU,                             //!< The number of reserved CUs
+                         amd::CommandQueue::Priority priority,  //!< Queue priority
+                         uint64_t residency_limit,              //!< Enables residency limit
+                         uint max_command_buffers  //!< Number of allocated command buffers
+    );
 
-    Queue(const VirtualGPU& gpu, Pal::IDevice* iDev, uint64_t residency_limit, uint max_command_buffers)
+    Queue(const VirtualGPU& gpu, Pal::IDevice* iDev, uint64_t residency_limit,
+          uint max_command_buffers)
         : iQueue_(nullptr),
           iCmdBuffs_(max_command_buffers, nullptr),
           iCmdFences_(max_command_buffers, nullptr),
@@ -75,8 +76,7 @@ class VirtualGPU : public device::VirtualDevice {
           vlAlloc_(64 * Ki),
           residency_size_(0),
           residency_limit_(residency_limit),
-          max_command_buffers_(max_command_buffers)
-    {
+          max_command_buffers_(max_command_buffers) {
       vlAlloc_.Init();
     }
 
@@ -100,8 +100,7 @@ class VirtualGPU : public device::VirtualDevice {
     Pal::Result UpdateAppPowerProfile();
 
     // ibReuse forces event wait without polling, to make sure event occured
-    template <bool ibReuse>
-    bool waifForFence(uint cbId) const {
+    template <bool ibReuse> bool waifForFence(uint cbId) const {
       Pal::Result result = Pal::Result::Success;
       uint64_t start;
       uint64_t end;
@@ -138,8 +137,7 @@ class VirtualGPU : public device::VirtualDevice {
 
     //! Flushes the current command buffer to HW
     //! Returns ID associated with the submission
-    template <bool avoidBarrierSubmit = false>
-    uint submit(bool forceFlush);
+    template <bool avoidBarrierSubmit = false> uint submit(bool forceFlush);
 
     bool flush();
 
@@ -151,28 +149,28 @@ class VirtualGPU : public device::VirtualDevice {
 
     uint cmdBufId() const { return cmdBufIdCurrent_; }
 
-    Pal::IQueue* iQueue_;                        //!< PAL queue object
-    std::vector<Pal::ICmdBuffer*> iCmdBuffs_;    //!< PAL command buffers
-    std::vector<Pal::IFence*> iCmdFences_;       //!< PAL fences, associated with CMD
-    const amd::Kernel* last_kernel_;             //!< Last submitted kernel
+    Pal::IQueue* iQueue_;                      //!< PAL queue object
+    std::vector<Pal::ICmdBuffer*> iCmdBuffs_;  //!< PAL command buffers
+    std::vector<Pal::IFence*> iCmdFences_;     //!< PAL fences, associated with CMD
+    const amd::Kernel* last_kernel_;           //!< Last submitted kernel
 
-  private:
+   private:
     void DumpMemoryReferences() const;
-    const VirtualGPU& gpu_; //!< OCL virtual GPU object
-    Pal::IDevice* iDev_;    //!< PAL device
-    uint cmdBufIdSlot_;     //!< Command buffer ID slot for submissions
-    uint cmdBufIdCurrent_;  //!< Current global command buffer ID
-    uint cmbBufIdRetired_;  //!< The last retired command buffer ID
-    uint cmdCnt_;           //!< Counter of commands
+    const VirtualGPU& gpu_;  //!< OCL virtual GPU object
+    Pal::IDevice* iDev_;     //!< PAL device
+    uint cmdBufIdSlot_;      //!< Command buffer ID slot for submissions
+    uint cmdBufIdCurrent_;   //!< Current global command buffer ID
+    uint cmbBufIdRetired_;   //!< The last retired command buffer ID
+    uint cmdCnt_;            //!< Counter of commands
     std::unordered_map<GpuMemoryReference*, uint> memReferences_;
-    Util::VirtualLinearAllocator    vlAlloc_;
-    std::vector<Pal::GpuMemoryRef>  palMemRefs_;
-    std::vector<Pal::IGpuMemory*>   palMems_;
-    std::vector<Pal::DoppRef>       palDoppRefs_;
-    std::set<Pal::IGpuMemory*>      sdiReferences_;
-    std::vector<const Pal::IGpuMemory*>   palSdiRefs_;
-    uint64_t  residency_size_;  //!< Resource residency size
-    uint64_t  residency_limit_; //!< Enables residency limit
+    Util::VirtualLinearAllocator vlAlloc_;
+    std::vector<Pal::GpuMemoryRef> palMemRefs_;
+    std::vector<Pal::IGpuMemory*> palMems_;
+    std::vector<Pal::DoppRef> palDoppRefs_;
+    std::set<Pal::IGpuMemory*> sdiReferences_;
+    std::vector<const Pal::IGpuMemory*> palSdiRefs_;
+    uint64_t residency_size_;   //!< Resource residency size
+    uint64_t residency_limit_;  //!< Enables residency limit
     uint max_command_buffers_;
   };
 
@@ -185,14 +183,14 @@ class VirtualGPU : public device::VirtualDevice {
     CommandBatch(amd::Command* head,      //!< Command batch head
                  const GpuEvent* events,  //!< HW events on all engines
                  TimeStamp* lastTS        //!< Last TS in command batch
-                 ) {
+    ) {
       init(head, events, lastTS);
     }
 
     void init(amd::Command* head,      //!< Command batch head
               const GpuEvent* events,  //!< HW events on all engines
               TimeStamp* lastTS        //!< Last TS in command batch
-              ) {
+    ) {
       head_ = head;
       lastTS_ = lastTS;
       memcpy(&events_, events, AllEngines * sizeof(GpuEvent));
@@ -202,11 +200,11 @@ class VirtualGPU : public device::VirtualDevice {
   //! The virtual GPU states
   union State {
     struct {
-      uint profiling_          : 1;     //!< Profiling is enabled
-      uint forceWait_          : 1;     //!< Forces wait in flush()
-      uint profileEnabled_     : 1;     //!< Profiling is enabled for WaveLimiter
-      uint perfCounterEnabled_ : 1;     //!< PerfCounter is enabled
-      uint rgpCaptureEnabled_  : 1;     //!< RGP capture is enabled in the runtime
+      uint profiling_ : 1;           //!< Profiling is enabled
+      uint forceWait_ : 1;           //!< Forces wait in flush()
+      uint profileEnabled_ : 1;      //!< Profiling is enabled for WaveLimiter
+      uint perfCounterEnabled_ : 1;  //!< PerfCounter is enabled
+      uint rgpCaptureEnabled_ : 1;   //!< RGP capture is enabled in the runtime
     };
     uint value_;
     State() : value_(0) {}
@@ -259,13 +257,13 @@ class VirtualGPU : public device::VirtualDevice {
     void findSplitSize(const Device& dev,  //!< GPU device object
                        uint64_t threads,   //!< Total number of execution threads
                        uint instructions   //!< Number of ALU instructions
-                       );
+    );
 
     // Returns TRUE if DMA command buffer is ready for a flush
     bool isCbReady(VirtualGPU& gpu,   //!< Virtual GPU object
                    uint64_t threads,  //!< Total number of execution threads
                    uint instructions  //!< Number of ALU instructions
-                   );
+    );
 
     // Returns dispatch split size
     uint dispatchSplitSize() const { return dispatchSplitSize_; }
@@ -301,7 +299,7 @@ class VirtualGPU : public device::VirtualDevice {
       bool nativeMem = true,               //!< Native memory objects
       amd::Event* enqueueEvent = nullptr,  //!< Event provided in the enqueue kernel command
       uint32_t sharedMemBytes = 0          //!< Shared memory size
-      );
+  );
   void submitNativeFn(amd::NativeFnCommand& vcmd);
   void submitFillMemory(amd::FillMemoryCommand& vcmd);
   void submitMigrateMemObjects(amd::MigrateMemObjectsCommand& cmd);
@@ -331,20 +329,20 @@ class VirtualGPU : public device::VirtualDevice {
   //! Set the last known GPU event
   void setGpuEvent(GpuEvent gpuEvent,  //!< GPU event for tracking
                    bool flush = false  //!< TRUE if flush is required
-                   );
+  );
 
   //! Flush DMA buffer on the specified engine
   void flushDMA(uint engineID  //!< Engine ID for DMA flush
-                );
+  );
 
   //! Wait for all engines on this Virtual GPU
   //! Returns TRUE if CPU didn't wait for GPU
   bool waitAllEngines(CommandBatch* cb = nullptr  //!< Command batch
-                      );
+  );
 
   //! Waits for the latest GPU event with a lock to prevent multiple entries
   void waitEventLock(CommandBatch* cb  //!< Command batch
-                     );
+  );
 
   //! Returns a resource associated with the constant buffer
   const ConstantBuffer* cb(uint idx) const { return constBufs_[idx]; }
@@ -355,7 +353,7 @@ class VirtualGPU : public device::VirtualDevice {
   //! Start the command profiling
   void profilingBegin(amd::Command& command,     //!< Command queue object
                       bool drmProfiling = false  //!< Measure DRM time
-                      );
+  );
 
   //! End the command profiling
   void profilingEnd(amd::Command& command);
@@ -363,11 +361,11 @@ class VirtualGPU : public device::VirtualDevice {
   //! Collect the profiling results
   bool profilingCollectResults(CommandBatch* cb,               //!< Command batch
                                const amd::Event* waitingEvent  //!< Waiting event
-                               );
+  );
 
   //! Adds a memory handle into the GSL memory array for Virtual Heap
   inline void addVmMemory(const Memory* memory  //!< GPU memory object
-                          );
+  );
 
   //! Adds the last submitted kernel to the queue for tracking a possible hang
   inline void AddKernel(const amd::Kernel& kernel  //!< AMD kernel object
@@ -377,7 +375,7 @@ class VirtualGPU : public device::VirtualDevice {
   void addDoppRef(const Memory* memory,  //!< GPU memory object
                   bool lastDoopCmd,      //!< is the last submission for the pre-present primary
                   bool pfpaDoppCmd       //!< is a submission for the pre-present primary
-                  );
+  );
 
   //! Return xfer buffer for staging operations
   XferBuffer& xferWrite() { return writeBuffer_; }
@@ -429,7 +427,7 @@ class VirtualGPU : public device::VirtualDevice {
 
   //! Returns TRUE if virtual queue was successfully allocatted
   bool createVirtualQueue(uint deviceQueueSize  //!< Device queue size
-                          );
+  );
 
   EngineType engineID_;  //!< Engine ID for this VirtualGPU
 
@@ -447,7 +445,8 @@ class VirtualGPU : public device::VirtualDevice {
   //! Returns queue, associated with VirtualGPU
   Queue& queue(EngineType id) const { return *queues_[id]; }
 
-  void addBarrier(RgpSqqtBarrierReason reason = RgpSqqtBarrierReason::Unknown, bool flushL2 = false) const {
+  void addBarrier(RgpSqqtBarrierReason reason = RgpSqqtBarrierReason::Unknown,
+                  bool flushL2 = false) const {
     Pal::BarrierInfo barrier = {};
     barrier.pipePointWaitCount = 1;
     Pal::HwPipePoint point = Pal::HwPipePostCs;
@@ -508,7 +507,7 @@ class VirtualGPU : public device::VirtualDevice {
   //! Returns TRUE if SDMA requires overlap synchronizaiton
   bool validateSdmaOverlap(const Resource& src,  //!< Source resource for SDMA transfer
                            const Resource& dst   //!< Destination resource for SDMA transfer
-                           );
+  );
 
   //! Checks if RGP capture is enabled
   bool rgpCaptureEna() const { return state_.rgpCaptureEnabled_; }
@@ -519,7 +518,7 @@ class VirtualGPU : public device::VirtualDevice {
   //! Creates buffer object from image
   amd::Memory* createBufferFromImage(
       amd::Memory& amdImage  //! The parent image object(untiled images only)
-      );
+  );
 
  private:
   struct MemoryRange {
@@ -537,14 +536,14 @@ class VirtualGPU : public device::VirtualDevice {
   //! Awaits a command batch with a waiting event
   bool awaitCompletion(CommandBatch* cb,                         //!< Command batch for to wait
                        const amd::Event* waitingEvent = nullptr  //!< A waiting event
-                       );
+  );
 
   //! Detects memory dependency for HSAIL kernels and flushes caches
   bool processMemObjectsHSA(const amd::Kernel& kernel,  //!< AMD kernel object for execution
                             const_address params,       //!< Pointer to the param's store
                             bool nativeMem,             //!< Native memory objects
-                            size_t& ldsAddess         //!< Returns LDS size, used in the kernel
-                            );
+                            size_t& ldsAddess           //!< Returns LDS size, used in the kernel
+  );
 
   //! Common function for fill memory used by both svm Fill and non-svm fill
   bool fillMemory(cl_command_type type,        //!< the command type
@@ -553,7 +552,7 @@ class VirtualGPU : public device::VirtualDevice {
                   size_t patternSize,          //!< pattern size
                   const amd::Coord3D& origin,  //!< memory origin
                   const amd::Coord3D& size     //!< memory size for filling
-                  );
+  );
 
   bool copyMemory(cl_command_type type,            //!< the command type
                   amd::Memory& srcMem,             //!< source memory object
@@ -564,35 +563,36 @@ class VirtualGPU : public device::VirtualDevice {
                   const amd::Coord3D& size,        //!< copy size
                   const amd::BufferRect& srcRect,  //!< region of source for copy
                   const amd::BufferRect& dstRect   //!< region of destination for copy
-                  );
+  );
 
   void buildKernelInfo(const HSAILKernel& hsaKernel,          //!< hsa kernel
                        hsa_kernel_dispatch_packet_t* aqlPkt,  //!< aql packet for dispatch
                        HwDbgKernelInfo& kernelInfo,           //!< kernel info for the dispatch
                        amd::Event* enqueueEvent  //!< Event provided in the enqueue kernel command
-                       );
+  );
 
   void assignDebugTrapHandler(const DebugToolInfo& dbgSetting,  //!< debug settings
                               HwDbgKernelInfo& kernelInfo       //!< kernel info for the dispatch
-                              );
+  );
 
   void PrintChildren(const HSAILKernel& hsaKernel,  //!< The parent HSAIL kernel
                      VirtualGPU* gpuDefQueue        //!< Device queue for children execution
-                     );
+  );
 
-  bool PreDeviceEnqueue(const amd::Kernel& kernel,    //!< Parent amd kernel object
-                        const HSAILKernel& hsaKernel, //!< Parent HSAIL object
-                        VirtualGPU** gpuDefQueue,     //!< [Return] GPU default queue
-                        uint64_t* vmDefQueue          //!< [Return] VM handle to the virtual queue
-                        );
+  bool PreDeviceEnqueue(const amd::Kernel& kernel,     //!< Parent amd kernel object
+                        const HSAILKernel& hsaKernel,  //!< Parent HSAIL object
+                        VirtualGPU** gpuDefQueue,      //!< [Return] GPU default queue
+                        uint64_t* vmDefQueue           //!< [Return] VM handle to the virtual queue
+  );
 
-  void PostDeviceEnqueue(const amd::Kernel& kernel,    //!< Parent amd kernel object
-                         const HSAILKernel& hsaKernel, //!< Parent HSAIL object
-                         VirtualGPU* gpuDefQueue,      //!< GPU default queue
-                         uint64_t vmDefQueue,          //!< VM handle to the virtual queue
-                         uint64_t vmParentWrap,        //!< VM handle to the wrapped AQL packet location
-                         GpuEvent* gpuEvent            //!< [Return] GPU event associated with the device enqueue
-                         );
+  void PostDeviceEnqueue(
+      const amd::Kernel& kernel,     //!< Parent amd kernel object
+      const HSAILKernel& hsaKernel,  //!< Parent HSAIL object
+      VirtualGPU* gpuDefQueue,       //!< GPU default queue
+      uint64_t vmDefQueue,           //!< VM handle to the virtual queue
+      uint64_t vmParentWrap,         //!< VM handle to the wrapped AQL packet location
+      GpuEvent* gpuEvent             //!< [Return] GPU event associated with the device enqueue
+  );
 
   Device& gpuDevice_;       //!< physical GPU device
   amd::Monitor execution_;  //!< Lock to serialise access to all device objects
@@ -605,11 +605,11 @@ class VirtualGPU : public device::VirtualDevice {
 
   DmaFlushMgmt dmaFlushMgmt_;  //!< DMA flush management
 
-  std::vector<amd::Memory*> pinnedMems_;   //!< Pinned memory list
+  std::vector<amd::Memory*> pinnedMems_;  //!< Pinned memory list
 
-  ManagedBuffer managedBuffer_; //!< Managed write buffer
-  constbufs_t   constBufs_;     //!< constant buffers
-  XferBuffer    writeBuffer_;   //!< Transfer/staging buffer for uploads
+  ManagedBuffer managedBuffer_;  //!< Managed write buffer
+  constbufs_t constBufs_;        //!< constant buffers
+  XferBuffer writeBuffer_;       //!< Transfer/staging buffer for uploads
 
   typedef std::queue<CommandBatch*> CommandBatchQueue;
   CommandBatchQueue cbQueue_;      //!< Queue of command batches
@@ -617,12 +617,12 @@ class VirtualGPU : public device::VirtualDevice {
 
   uint hwRing_;  //!< HW ring used on this virtual device
 
-  State state_;          //!< virtual GPU current state
+  State state_;                  //!< virtual GPU current state
   GpuEvent events_[AllEngines];  //!< Last known GPU events
 
-  uint64_t readjustTimeGPU_;   //!< Readjust time between GPU and CPU timestamps
-  TimeStamp* lastTS_;          //!< Last timestamp executed on Virtual GPU
-  TimeStamp* profileTs_;       //!< current profiling timestamp for command
+  uint64_t readjustTimeGPU_;  //!< Readjust time between GPU and CPU timestamps
+  TimeStamp* lastTS_;         //!< Last timestamp executed on Virtual GPU
+  TimeStamp* profileTs_;      //!< current profiling timestamp for command
 
   AmdVQueueHeader* vqHeader_;  //!< Sysmem copy for virtual queue header
   Memory* virtualQueue_;       //!< Virtual device queue
@@ -645,8 +645,7 @@ inline void VirtualGPU::AddKernel(const amd::Kernel& kernel) const {
   queues_[MainEngine]->last_kernel_ = &kernel;
 }
 
-template <bool avoidBarrierSubmit>
-uint VirtualGPU::Queue::submit(bool forceFlush) {
+template <bool avoidBarrierSubmit> uint VirtualGPU::Queue::submit(bool forceFlush) {
   cmdCnt_++;
   uint id = cmdBufIdCurrent_;
   bool flushCmd = ((cmdCnt_ > MaxCommands) || forceFlush) && !avoidBarrierSubmit;
@@ -659,32 +658,30 @@ uint VirtualGPU::Queue::submit(bool forceFlush) {
 }
 
 template <typename T>
-inline void WriteAqlArgAt(
-  unsigned char* dst,   //!< The write pointer to the buffer
-  const T* src,         //!< The source pointer
-  uint size,            //!< The size in bytes to copy
-  size_t offset         //!< The alignment to follow while writing to the buffer
+inline void WriteAqlArgAt(unsigned char* dst,  //!< The write pointer to the buffer
+                          const T* src,        //!< The source pointer
+                          uint size,           //!< The size in bytes to copy
+                          size_t offset  //!< The alignment to follow while writing to the buffer
 ) {
   memcpy(dst + offset, src, size);
 }
 
 template <>
-inline void WriteAqlArgAt(
-  unsigned char* dst,   //!< The write pointer to the buffer
-  const uint32_t* src,  //!< The source pointer
-  uint size,            //!< The size in bytes to copy
-  size_t offset         //!< The alignment to follow while writing to the buffer
+inline void WriteAqlArgAt(unsigned char* dst,   //!< The write pointer to the buffer
+                          const uint32_t* src,  //!< The source pointer
+                          uint size,            //!< The size in bytes to copy
+                          size_t offset  //!< The alignment to follow while writing to the buffer
 ) {
   *(reinterpret_cast<uint32_t*>(dst + offset)) = *src;
 }
 
 template <>
-inline void WriteAqlArgAt(
-  unsigned char* dst,   //!< The write pointer to the buffer
-  const uint64_t* src,  //!< The source pointer
-  uint size,            //!< The size in bytes to copy
-  size_t offset         //!< The alignment to follow while writing to the buffer
+inline void WriteAqlArgAt(unsigned char* dst,   //!< The write pointer to the buffer
+                          const uint64_t* src,  //!< The source pointer
+                          uint size,            //!< The size in bytes to copy
+                          size_t offset  //!< The alignment to follow while writing to the buffer
 ) {
   *(reinterpret_cast<uint64_t*>(dst + offset)) = *src;
 }
-/*@}*/} // namespace pal
+/*@}*/  // namespace pal
+}  // namespace pal