Migrate amdgpu-windows-interop to rocm-systems (#808)

2025-09-05 10:32:44 -04:00
Commit 5ca7af2d30
@@ -0,0 +1,833 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  pal.h
+ * @brief Common include for the Platform Abstraction Library (PAL) interface.  Defines common types, enums, etc.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "palFormat.h"
+#include "palSysUtil.h"
+
+// Forward declarations of global types (must be done outside of Pal namespace).
+#if (PAL_KMT_BUILD) && !defined(__unix__)
+struct HMONITOR__;
+struct HWND__;
+#endif
+
+#if PAL_KMT_BUILD
+struct _SECURITY_ATTRIBUTES;
+#endif
+
+/// Library-wide namespace encapsulating all PAL entities.
+namespace Pal
+{
+
+typedef Util::int8    int8;     ///< 8-bit integer.
+typedef Util::int16   int16;    ///< 16-bit integer.
+typedef Util::int32   int32;    ///< 32-bit integer.
+typedef Util::int64   int64;    ///< 64-bit integer.
+typedef Util::uint8   uint8;    ///< Unsigned 8-bit integer.
+typedef Util::uint16  uint16;   ///< Unsigned 16-bit integer.
+typedef Util::uint32  uint32;   ///< Unsigned 32-bit integer.
+typedef Util::uint64  uint64;   ///< Unsigned 64-bit integer.
+typedef Util::gpusize gpusize;  ///< Used to specify GPU addresses and sizes of GPU allocations.  This differs from
+                                ///  size_t since the GPU still uses 64-bit addresses on a 32-bit OS.
+typedef Util::Result  Result;   ///< The PAL core and utility companion share the same result codes for convenience.
+
+typedef Util::Rational Rational; ///< A ratio of two unsigned integers.
+
+#if defined(_WIN32)
+typedef HMONITOR__* OsDisplayHandle;  ///< OsDisplayHandle corresponds to an HMONITOR on Windows.
+typedef HWND__*     OsWindowHandle;   ///< OsWindowHandle corresponds to an HWND on Windows.
+typedef void*       OsExternalHandle; ///< OsExternalHandle corresponds to a generic HANDLE on Windows
+
+typedef uint32  OsVideoSessionHandle;   ///< OsVideoSessionHandle corresponds to a video session handle on Vulkan.
+
+constexpr OsWindowHandle NullWindowHandle = nullptr; ///< Value representing a null or invalid window handle.
+#elif defined(__unix__)
+
+typedef void*   OsDisplayHandle;        ///< The Display Handle for Linux except X11 platform
+typedef uint32  OsExternalHandle;       ///< OsExternalHandle corresponds to a generic handle on linux
+typedef uint32  OsVideoSessionHandle;   ///< OsVideoSessionHandle corresponds to a video session handle on linux.
+
+/// OsWindowHandle corresponds to a window on X-Windows or surface on Wayland.
+union OsWindowHandle
+{
+    void*  pSurface;  ///< Native surface handle in wayland is a pointer.
+    uint64 win;       ///< Native window handle in X is a 32-bit integer (but stored here as 64 bit).
+};
+constexpr OsWindowHandle NullWindowHandle = {nullptr}; ///< Value representing a null or invalid window handle.
+
+// don't check for the Linux Platform type; just compare the larger member of the union
+inline bool operator==(const Pal::OsWindowHandle& lhs, const Pal::OsWindowHandle& rhs)
+    { return (lhs.pSurface == rhs.pSurface); }
+inline bool operator!=(const Pal::OsWindowHandle& lhs, const Pal::OsWindowHandle& rhs)
+    { return (lhs.pSurface != rhs.pSurface); }
+#else
+#error "Unsupported OS platform detected!"
+#endif
+
+#if PAL_CLIENT_EXAMPLE
+typedef void*  AddrHandle;      ///< Corresponds to an ADDR_HANDLE.
+#endif
+
+constexpr uint32 InvalidVidPnSourceId     = ~0u; ///< In cases where PAL cannot abstract a Windows VidPnSourceId, this
+                                                 ///  represents an invalid value. (Note: zero is a valid value.)
+
+constexpr uint32 MaxVertexBuffers         = 32;  ///< Maximum number of vertex buffers per pipeline.
+constexpr uint32 MaxColorTargets          = 8;   ///< Maximum number of color targets.
+constexpr uint32 MaxStreamOutTargets      = 4;   ///< Maximum number of stream output target buffers.
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 936
+constexpr uint32 MaxDescriptorSets        = 2;   ///< Maximum number of descriptor sets.
+#endif
+constexpr uint32 MaxMsaaRasterizerSamples = 16;  ///< Maximum number of MSAA samples supported by the rasterizer.
+constexpr uint32 MaxAvailableEngines      = 12;  ///< Maximum number of engines for a particular engine type.
+constexpr uint32 MaxNumPlanes             = 3;   ///< Maximum number of format planes.
+
+constexpr uint64 InternalApiPsoHash       = UINT64_MAX;  ///< Default Hash for PAL internal pipelines.
+
+/// Specifies a category of GPU engine.  Each category corresponds directly to a hardware engine. There may be multiple
+/// engines available for a given type; the available engines on a particular GPU can be queried via
+/// Device::GetProperties, returned in DeviceProperties.engineProperties[].
+enum EngineType : uint32
+{
+    /// Corresponds to the graphics hardware engine (a.k.a. graphcis ring a.k.a 3D).
+    EngineTypeUniversal,
+
+    /// Corresponds to asynchronous compute engines (ACE).
+    EngineTypeCompute,
+
+    /// Corresponds to SDMA engines.
+    EngineTypeDma,
+
+    /// Virtual engine that only supports inserting sleeps, used for implementing frame-pacing.
+    EngineTypeTimer,
+
+    /// Number of engine types.
+    EngineTypeCount,
+};
+
+/// Specifies a category of GPU work.  Each queue type only supports specific types of work. Determining which
+/// QueueTypes are supported on which engines can be queried via IDevice::GetProperties, returned in
+/// DeviceProperties.engineProperties[].
+enum QueueType : uint32
+{
+    /// Supports graphics commands (draws), compute commands (dispatches), and copy commands.
+    QueueTypeUniversal,
+
+    /// Supports compute commands (dispatches), and copy commands.
+    QueueTypeCompute,
+
+    /// Supports copy commands.
+    QueueTypeDma,
+
+    /// Virtual engine that only supports inserting sleeps, used for implementing frame pacing.
+    /// This is a software-only queue.
+    QueueTypeTimer,
+
+    /// Number of queue types.
+    QueueTypeCount,
+};
+
+/// Defines flags for describing which queues are supported.
+enum QueueTypeSupport : uint32
+{
+    SupportQueueTypeUniversal   = (1 << static_cast<uint32>(QueueTypeUniversal)),
+    SupportQueueTypeCompute     = (1 << static_cast<uint32>(QueueTypeCompute)),
+    SupportQueueTypeDma         = (1 << static_cast<uint32>(QueueTypeDma)),
+    SupportQueueTypeTimer       = (1 << static_cast<uint32>(QueueTypeTimer)),
+
+};
+
+// Many command buffers break down into multiple command streams targeting internal sub-engines. For example, Universal
+// command buffers build a primary stream (DE) but may also build a second stream for async compute engine (ACE).
+enum class SubEngineType : uint32
+{
+    Primary        = 0, // Subqueue that is the queue itself, rather than an ancillary queue.
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 914
+    AsyncCompute   = 1, // Auxiliary ACE subqueue, together with a primary subqueue forms a "ganged" submit.
+    ConstantEngine = 2, // CP constant update engine that runs in parallel with draw engine.
+                        // Internal usage only.
+#else
+    ConstantEngine = 1, // CP constant update engine that runs in parallel with draw engine.
+    AsyncCompute   = 2, // Auxiliary ACE subqueue, together with a primary subqueue forms a "ganged" submit.
+    Pup            = 3, // Subqueue that is the queue itself but for PUP-style packets, rather than an
+                        // ancillary queue
+#endif
+    Count,
+};
+
+/// Defines the execution priority for a queue, specified either at queue creation or via IQueue::SetExecutionPriority()
+/// on platforms that support it.  QueuePriority::Normal corresponds to the default priority.
+enum class QueuePriority : uint32
+{
+    Normal   =  0,  ///< Normal priority (default).
+    Idle     =  1,  ///< Idle, or low priority (lower than Normal).
+    Medium   =  2,  ///< Medium priority (higher than Normal).
+    High     =  3,  ///< High priority (higher than Normal).
+    Realtime =  4,  ///< Real time priority (higher than Normal).
+    Count
+};
+
+/// Defines flags for describing which queue priority levels are supported.
+enum QueuePrioritySupport : uint32
+{
+    SupportQueuePriorityNormal   = (1 << static_cast<uint32>(QueuePriority::Normal)),
+    SupportQueuePriorityIdle     = (1 << static_cast<uint32>(QueuePriority::Idle)),
+    SupportQueuePriorityMedium   = (1 << static_cast<uint32>(QueuePriority::Medium)),
+    SupportQueuePriorityHigh     = (1 << static_cast<uint32>(QueuePriority::High)),
+    SupportQueuePriorityRealtime = (1 << static_cast<uint32>(QueuePriority::Realtime)),
+};
+
+/// Selects one of a few possible memory heaps accessible by a GPU.
+enum GpuHeap : uint32
+{
+    GpuHeapLocal         = 0x0,  ///< Local heap visible to the CPU.
+    GpuHeapInvisible     = 0x1,  ///< Local heap not visible to the CPU.
+    GpuHeapGartUswc      = 0x2,  ///< GPU-accessible uncached system memory.
+    GpuHeapGartCacheable = 0x3,  ///< GPU-accessible cached system memory.
+    GpuHeapCount
+};
+
+/// Describes the desired access for a memory allocation.
+enum GpuHeapAccess : uint32
+{
+    GpuHeapAccessExplicit       = 0x0, ///< Memory access is not known. Heaps will be explicitly defined.
+    GpuHeapAccessCpuNoAccess    = 0x1, ///< Memory access from CPU not required.
+    GpuHeapAccessGpuMostly      = 0x2, ///< Memory optimized for reads/writes from GPU and accessible from CPU.
+    GpuHeapAccessCpuReadMostly  = 0x3, ///< Memory optimized for reads from CPU.
+    GpuHeapAccessCpuWriteMostly = 0x4, ///< Memory optimized for writes from CPU.
+    GpuHeapAccessCpuMostly      = 0x5, ///< Memory optimized for read/writes from CPU.
+    GpuHeapAccessCount
+};
+
+#if defined(__unix__)
+/// Describes possible handle types.
+enum class HandleType : uint32
+{
+    GemFlinkName      = 0x0, ///< GEM flink name (needs DRM authentication, used by DRI2)
+    Kms               = 0x1, ///< KMS handle which is used by all driver ioctls
+    DmaBufFd          = 0x2, ///< DMA-buf fd handle
+    KmsNoImport       = 0x3, ///< Deprecated in favour of and same behaviour as HandleTypeDmaBufFd, use that instead of this
+};
+#endif
+
+/// Comparison function determines how a pass/fail condition is determined between two values.  For depth/stencil
+/// comparison, the first value comes from source data and the second value comes from destination data.
+enum class CompareFunc : uint8
+{
+    Never        = 0x0,
+    Less         = 0x1,
+    Equal        = 0x2,
+    LessEqual    = 0x3,
+    Greater      = 0x4,
+    NotEqual     = 0x5,
+    GreaterEqual = 0x6,
+    _Always      = 0x7,
+
+    // Unfortunately for Linux clients, X.h includes a "#define Always 2" macro.  Clients have their choice of either
+    // undefing Always before including this header or using _Always when dealing with PAL.
+#ifndef Always
+    Always       = _Always,
+#endif
+
+    Count
+};
+
+/// Defines an offset into a 2D pixel region.
+struct Offset2d
+{
+    int32 x;  ///< X offset.
+    int32 y;  ///< Y offset.
+};
+
+/// Defines an offset into a 3D pixel region.
+struct Offset3d
+{
+    int32 x;  ///< X offset.
+    int32 y;  ///< Y offset.
+    int32 z;  ///< Z offset.
+};
+
+/// Defines an floating-point offset into a 3D pixel region.
+struct Offset3dFloat
+{
+    float x;  ///< X offset.
+    float y;  ///< Y offset.
+    float z;  ///< Z offset.
+};
+
+/// Defines a width and height for a 2D image region. The dimensions could be pixels, blocks, or bytes
+/// depending on context, so be sure to check documentation for the PAL interface of interest to be sure you
+/// get it right.
+struct Extent2d
+{
+    uint32 width;   ///< Width of region.
+    uint32 height;  ///< Height of region.
+};
+
+/// Defines a signed width and height, for a 2D image region. The dimensions could be pixels, blocks, or bytes
+/// depending on context, so be sure to check documentation for the PAL interface of interest to be sure you
+/// get it right.
+struct SignedExtent2d
+{
+    int32 width;    ///< Width of region.
+    int32 height;   ///< Height of region.
+};
+
+/// Defines a width, height, and depth for a 3D image region. The dimensions could be pixels, blocks, or bytes
+/// depending on context, so be sure to check documentation for the PAL interface of interest to be sure you
+/// get it right.
+struct Extent3d
+{
+    uint32 width;   ///< Width of region.
+    uint32 height;  ///< Height of region.
+    uint32 depth;   ///< Depth of region.
+};
+
+constexpr bool operator==(const Extent3d& x, const Extent3d& y)
+{
+    return (x.width == y.width) && (x.height == y.height) && (x.depth == y.depth);
+}
+
+constexpr bool operator!=(const Extent3d& x, const Extent3d& y) { return (x == y) == false; }
+
+/// Defines a signed width, height, and depth for a 3D image region. The dimensions could be pixels, blocks, or bytes
+/// depending on context, so be sure to check documentation for the PAL interface of interest to be sure you
+/// get it right.
+struct SignedExtent3d
+{
+    int32 width;    ///< Width of region.
+    int32 height;   ///< Height of region.
+    int32 depth;    ///< Depth of region.
+};
+
+/// Defines a floating-point width, height, and depth for a 3D image region. The dimensions could be pixels, blocks, or
+/// bytes depending on context, so be sure to check documentation for the PAL interface of interest to be sure you
+/// get it right.
+struct Extent3dFloat
+{
+    float width;    ///< Width of region.
+    float height;   ///< Height of region.
+    float depth;    ///< Depth of region.
+};
+
+/// Defines a region in 1D space.
+struct Range
+{
+    int32  offset;  ///< Starting position.
+    uint32 extent;  ///< Region size.
+};
+
+/// Defines a rectangular region in 2D space.
+struct Rect
+{
+    Offset2d offset;  ///< Top left corner.
+    Extent2d extent;  ///< Rectangle width and height.
+};
+
+/// Defines a cubic region in 3D space.
+struct Box
+{
+    Offset3d offset;  ///< Top left front corner.
+    Extent3d extent;  ///< Box width, height and depth.
+};
+
+/// ShaderHash represents a 128-bit shader hash.
+struct ShaderHash
+{
+    uint64 lower;   ///< Lower 64-bits of hash
+    uint64 upper;   ///< Upper 64-bits of hash
+};
+
+/// PipelineHash represents a concatenated pair of 64-bit hashes.
+struct PipelineHash
+{
+    uint64 stable;   ///< Lower 64-bits of hash.  "Stable" portion, suitable for e.g. shader replacement use cases.
+    uint64 unique;   ///< Upper 64-bits of hash.  "Unique" portion, suitable for e.g. pipeline cache use cases.
+};
+
+/// Common shader pre and post compilation stats.
+struct CommonShaderStats
+{
+    uint32  numUsedVgprs;               ///< Number of VGPRs used by this shader
+    uint32  numUsedSgprs;               ///< Number of SGPRs used by this shader
+
+    uint32  ldsSizePerThreadGroup;      ///< LDS size per thread group in bytes.
+    size_t  ldsUsageSizeInBytes;        ///< LDS usage by this shader.
+
+    size_t  scratchMemUsageInBytes;     ///< Amount of scratch mem used by this shader.
+    gpusize gpuVirtAddress;             ///< Gpu mem address of shader ISA code.
+
+    union
+    {
+        struct
+        {
+            uint32 isWave32 :  1;  ///< If set, specifies that the shader is compiled in wave32 mode.
+            uint32 reserved : 31;  ///< Reserved for future use.
+        };
+        uint32 u32All;  ///< Flags packed as a 32-bit uint.
+    } flags;            ///< Shader compilation stat flags.
+};
+
+/// Per-thread stack sizes
+struct CompilerStackSizes
+{
+    uint32 backendSize;  ///< Managed by compiler backend
+    uint32 frontendSize; ///< Managed by compiler frontend
+};
+
+///@{
+/// Determines whether two ShaderHashes or PipelineHashes are equal.
+///
+/// @param  [in]    hash1    The first 128-bit shader hash or pipeline hash
+/// @param  [in]    hash2    The second 128-bit shader hash or pipeline hash
+///
+/// @returns True if the hashes are equal.
+constexpr bool ShaderHashesEqual(const ShaderHash hash1, const ShaderHash hash2)
+    { return ((hash1.lower  == hash2.lower)  && (hash1.upper  == hash2.upper)); }
+constexpr bool operator==(const ShaderHash hash1, const ShaderHash hash2)
+    { return ((hash1.lower  == hash2.lower)  && (hash1.upper  == hash2.upper)); }
+constexpr bool operator!=(const ShaderHash hash1, const ShaderHash hash2)
+    { return ((hash1.lower  != hash2.lower)  || (hash1.upper  != hash2.upper)); }
+constexpr bool PipelineHashesEqual(const PipelineHash hash1, const PipelineHash hash2)
+    { return ((hash1.stable == hash2.stable) && (hash1.unique == hash2.unique)); }
+///@}
+
+///@{
+/// Determines whether the given ShaderHash or PipelineHash is non-zero.
+///
+/// @param  [in]    hash    A 128-bit shader hash or pipeline hash
+///
+/// @returns True if the hash is non-zero.
+constexpr bool ShaderHashIsNonzero(const ShaderHash hash)     { return ((hash.upper  | hash.lower)  != 0); }
+constexpr bool PipelineHashIsNonzero(const PipelineHash hash) { return ((hash.stable | hash.unique) != 0); }
+///@}
+
+/// Specifies the Display Output Post-Processing (DOPP) desktop texture information, which are provided by OpenGL via
+/// interop.  The DOPP is an OpenGL extension to allow its client to access the desktop texture directly without the
+/// need of copying to system memory.  This is only supported on Windows.
+struct DoppDesktopInfo
+{
+    gpusize gpuVirtAddr;    ///< The VA of the dopp desktop texture. Set to 0 for the non-dopp resource.
+    uint32  vidPnSourceId;  ///< Display source id of the dopp desktop texture.
+};
+
+/// Specifies the Direct Capture resource information. Direct Capture is an extension that allows to access on-screen
+/// primary, motion vectors, depth, and camera matrix directly. This is only supported on Windows.
+struct DirectCaptureInfo
+{
+    uint32  vidPnSourceId;  ///< VidPnSource ID of the on-screen primary.
+    union
+    {
+        struct
+        {
+            uint32 preflip              :  1;  ///< Requires pre-flip primary access
+            uint32 postflip             :  1;  ///< Requires post-flip primary access. A DirectCapture resource cannot
+                                               ///  have pre-flip and post-flip access at the same time
+            uint32 accessDesktop        :  1;  ///< Requires acces to the desktop
+            uint32 shared               :  1;  ///< This resource will be shared between APIs
+            uint32 frameGenRatio        :  4;  ///< Frame generation ratio
+            uint32 paceGeneratedFrame   :  1;  ///< Requires pacing the generated frames
+            uint32 requiresDisplayDcc   :  1;  ///< Requires display dcc support
+            uint32 requestMotionVectors :  1;  ///< Request DirectCapture access to motion vector data if available
+            uint32 requestDepth         :  1;  ///< Request DirectCapture access to depth data if available
+            uint32 requestCamera        :  1;  ///< Request DirectCapture access to camera matrix data if available
+            uint32 initMotionVectors    :  1;  ///< Initialize the DirectCapture resource to access motion vector data
+            uint32 initDepth            :  1;  ///< Initialize the DirectCapture resource to access depth data
+            uint32 initCamera           :  1;  ///< Initialize the DirectCapture resource to access camera matrix
+            uint32 requestHudLessImage  :  1;  ///< Request DirectCapture access to HUD less image if available
+            uint32 initHudLessImage     :  1;  ///< Initialize the DirectCapture resource to access HUD less image
+            uint32 reserved             : 14;
+        };
+        uint32 u32All;
+    } usageFlags;
+
+    OsExternalHandle hNewFrameEvent;   ///< Event to notify of a new frame available for pre-flip or post-flip access
+    OsExternalHandle hFatalErrorEvent; ///< Event to notify of a fatal error
+};
+
+/// Specifies parameters for opening a shared GPU resource from a non-PAL device or non-local process.
+struct ExternalResourceOpenInfo
+{
+    OsExternalHandle hExternalResource;         ///< External GPU resource from another non-PAL device to open.
+#if defined(__unix__)
+    HandleType       handleType;                ///< Type of the external GPU resource to be opened.
+#endif
+
+    union
+    {
+        struct
+        {
+            uint32 ntHandle           :  1; ///< The provided hExternalResource is an NT handle instead of a default
+                                            ///  KMT handle.
+            uint32 androidHwBufHandle :  1; ///< The provided hExternalResource is android hardware buffer handle
+                                            ///  instead of fd.
+            uint32 isDopp             :  1; ///< This is a Dopp texture, doppDesktopInfo is in use.
+            uint32 isDirectCapture    :  1; ///< This is a Direct Capture resource, directCaptureInfo is in use.
+            uint32 globalGpuVa        :  1; ///< The GPU virtual address must be visible to all devices.
+            uint32 reserved           : 27; ///< Reserved for future use.
+        };
+        uint32 u32All;            ///< Flags packed as 32-bit uint.
+    } flags;                      ///< External resource open flags.
+
+    union
+    {
+        DoppDesktopInfo   doppDesktopInfo;      ///< The information of dopp desktop texture.
+        DirectCaptureInfo directCaptureInfo;    ///< The information of direct capture resource.
+    };
+};
+
+/// Packed pixel display enumeration.
+///
+/// In the medical imaging market space, there are several 10-bit per component color and grayscale displays
+/// available.In addition to being high precision, these displays tend to be very high resolution.For grayscale
+/// displays,one method of getting high pixel resolution in 10b precision is a proprietary method called
+/// "packed pixel".Each of these packed pixel formats packs two/three 10-bit luminance values into a single
+/// R8G8B8 pixel.
+///
+/// Example Displays:
+///
+///     EIZO GS510
+///     NEC MD21GS
+///     TOTOKU ME55Xi2
+///     FIMI 3/5MP
+///
+///
+///   The enumerations are named in a way to describe the format of the packed pixels. Names for
+///   formats with two or three pixels packed into a single word (corresponding to a simple RGB pixel)
+///   follow this convention:
+///
+///       LLLLLL_RRRRRR (L=left pixel, R=right pixel) or
+///       LLL_MMM_RRR (L=left pixel, M=middle pixel, R=right pixel)
+///
+///   The bit order for a pixel follows this convention:
+///
+///       (ColorBand)MSB(ColorBand)LSB
+///
+///   For example: G70B54 means that the MSBs are in 7-0 of the green channel, and the LSBs
+///   are stored in bits 5-4.
+///
+enum class PackedPixelType : uint32
+{
+    NotPacked = 0,          ///< Pixels not packed, for standard color RGB8 monitor
+    SplitG70B54_R70B10,     ///< 10-bit mono, split screen
+    SplitB70G10_R70G76,     ///< 10-bit mono, split screen
+    G70B54_R70B10,          ///< 10-bit mono, 2 adjacent pixels
+    B70R32_G70R76,          ///< 10-bit mono, 2 adjacent pixels
+    B70R30_G70R74,          ///< 12-bit mono, 2 adjacent pixels
+    B70_G70_R70,            ///< 8-bit mono, 3 adjacent pixels
+    R70G76,                 ///< 10-bit mono, single pixel
+    G70B54,                 ///< 10-bit mono, single pixel
+    Native,                 ///< 10-bit color, without packing
+};
+
+/// Enumerates the logging priority levels supported by PAL.
+enum class LogLevel : uint32
+{
+    Debug = 0, ///< Debug messages
+    Verbose,   ///< High frequency messages
+    Info,      ///< Low frequency messages
+    Alert,     ///< Warnings
+    Error,     ///< Critical issues
+    Always     ///< All messages
+};
+
+/// Enumerates all log categories explicitly defined by PAL
+enum class LogCategory : uint64
+{
+    Correctness = 0, ///< Application correctness
+    Performance,     ///< Application performance
+    Internal,        ///< Internal logging
+    Display,         ///< Display Info
+    Count
+};
+
+/// String table used to register log categories
+constexpr const char* LogCategoryTable[] =
+{
+    "Correctness",
+    "Performance",
+    "Internal",
+    "Display"
+};
+
+/// Typedef for log category masks.
+typedef uint64 LogCategoryMask;
+
+/// Log category mask for messages related to application correctness
+constexpr LogCategoryMask LogCategoryMaskCorrectness = (1 << static_cast<uint32>(LogCategory::Correctness));
+
+/// Log category mask for messages related to application performance
+constexpr LogCategoryMask LogCategoryMaskPerformance = (1 << static_cast<uint32>(LogCategory::Performance));
+
+/// Log category mask for messages related to internal messages
+constexpr LogCategoryMask LogCategoryMaskInternal    = (1 << static_cast<uint32>(LogCategory::Internal));
+
+/// Log category mask for messages related to display information (e.g. HDR format)
+constexpr LogCategoryMask LogCategoryMaskDisplay = (1 << static_cast<uint32>(LogCategory::Display));
+
+/// Defines the modes that the GPU Profiling layer can be enabled with. If the GpuProfilerMode is
+/// GpuProfilerTraceEnabledTtv or GpuProfilerTraceEnabledRgp, then the GpuProfilerConfig_TraceModeMask is examined to
+/// configure the trace type (spm, sqtt or both) requested.
+enum GpuProfilerMode : uint32
+{
+    GpuProfilerDisabled              = 0, ///< Gpu Profiler is disabled.
+    GpuProfilerCounterAndTimingOnly  = 1, ///< Traces are disabled but perf counter and timing operations are enabled.
+    GpuProfilerTraceEnabledTtv       = 2, ///< Traces are output in format (.csv, .out) for Thread trace viewer.
+    GpuProfilerTraceEnabledRgp       = 3, ///< Trace data is output as .rgp file for Radeon Gpu Profiler.
+};
+
+// Defines the trigger keys for capturing the GPU profiler.
+typedef Util::KeyCode GpuProfilerCaptureTriggerKey;
+
+#define PAL_EVENT_LOGGING_VERSION 528
+
+/// This enumeration identifies the source/owner of a resource object, used for event logging.
+enum ResourceOwner : uint32
+{
+    ResourceOwnerApplication = 0,    ///< The resource is owned by the application
+    ResourceOwnerPalClient   = 1,    ///< The resource is owned by the PAL client
+    ResourceOwnerPal         = 2,    ///< The resource is owned by PAL
+    ResourceOwnerUnknown     = 3,    ///< The resource owner is unknown
+};
+
+/// This enumeration lists the usage/category of a resource object to give context in event logging.
+enum ResourceCategory : uint32
+{
+    ResourceCategoryApplication = 0,    ///< The resource is used by the application.
+    ResourceCategoryRpm         = 1,    ///< The resource is used by RPM
+    ResourceCategoryProfiling   = 2,    ///< The resource is used for profiling (e.g. SQTT, SPM, etc)
+    ResourceCategoryDebug       = 3,    ///< The resource is used for debug purposes
+    ResourceCategoryRayTracing  = 4,    ///< The resource is used for ray tracing
+    ResourceCategoryVideo       = 5,    ///< The resource is used for video encode/decode
+    ResourceCategoryMisc        = 6,    ///< Miscellaneous, resource doesn't fit in any of the above categories
+    ResourceCategoryUnknown     = 7,    ///< The resource category is unknown
+};
+
+/// Set of information about resource ownership and usage, used for event logging.
+struct ResourceEventInfo
+{
+    ResourceOwner    owner;     ///< Resource owner
+    ResourceCategory category;  ///< Resource category
+};
+
+/// General purpose on/off/default tri-state enum.
+enum class TriState : uint8
+{
+    Default = 0,  ///< Let implementation decide whether to enable or disable
+    Enable  = 1,  ///< Force enable
+    Disable = 2,  ///< Force disable
+    Count
+};
+
+/// Defines the modes that the GPU Profiling layer can be enabled with.
+/**
+ ***********************************************************************************************************************
+ * @mainpage
+ *
+ * Introduction
+ * ------------
+ * The Platform Abstraction Library (PAL) provides hardware and OS abstractions for Radeon (GCN+) user-mode 3D graphics
+ * drivers.  The level of abstraction is chosen to support performant driver implementations of several APIs while
+ * hiding the client from hardware and operating system details.
+ *
+ * PAL client drivers will have no HW-specific code; their responsibility is to translate API/DDI commands into PAL
+ * commands as efficiently as possible.  This means that the client should be unaware of hardware registers, PM4
+ * commands, SP3 shaders, etc.  However, PAL is an abstraction of AMD hardware only, so many things in the PAL interface
+ * have an obvious correlation to hardware features.
+ *
+ * PAL client drivers should have little OS-specific code.  PAL and its companion utility collection provide
+ * OS abstractions for almost everything a client might need, but there are some cases where this is unavoidable:
+ *
+ * + Handling dynamic library infrastructure.  I.e., the client has to implement DllMain() on Windows, etc.
+ * + OS-specific APIs or extensions.  DX may have Windows-specific functionality in the core API, and Vulkan may
+ *   export certain OS-specific features as extensions (like for presenting contents to the screen).
+ * + Single OS clients (e.g., DX) may choose to make OS-specific calls directly simply out of convenience with no down
+ *   side.
+ *
+ *
+ * The following diagram illustrates the software stack when running a 3D application with a PAL-based UMD.  Non-AMD
+ * components are in gray, UMD client code is blue, AMD static libs linked into the UMD are green, and the AMD KMD
+ * is in red.
+ *
+ * @image html swStack.png
+ *
+ * PAL is a relatively _thick_ abstraction layer, typically accounting for the majority of code (excluding SC) in any
+ * particular UMD built on PAL.  The level of abstraction tends to be higher in areas where client APIs are similar,
+ * and lower (closer to hardware) in areas where client APIs diverge significantly.  The overall philosophy is to share
+ * as much code as possible without impacting client driver performance.  Our committed goal is that CPU-limited
+ * performance should be within 5% of what a native solution could achieve, and GPU-limited performance should be within
+ * 2%.
+ *
+ * PAL uses a C++ interface.  The public interface is defined in .../pal/inc, and client must _only_ include headers
+ * from that directory.  The interface is spread over many header files - typically one per class - in order to clarify
+ * dependencies and reduce build times.  There are two sub-directories in .../pal/inc:
+ *
+ * + <b>.../pal/inc/core</b>    - Defines the PAL Core (see @ref Overview).
+ * + <b>.../pal/inc/gpuUtil</b> - Defines the PAL GPU Utility Collection (see @ref GpuUtilOverview).
+ * + <b>.../pal/inc/util</b>    - Defines the PAL Utility Collection (see @ref UtilOverview).
+ *
+ *
+ * @copydoc VersionHistory
+ *
+ * Next: @ref Build
+ ***********************************************************************************************************************
+ */
+
+/**
+ ***********************************************************************************************************************
+ * @page Overview PAL Core Overview
+ *
+ * ### Introduction
+ * PAL's core interface is defined in the @ref Pal namespace, and defines an object-oriented model for interacting with
+ * the GPU and OS.  The interface closely resembles the Vulkan and DX12 APIs.  Some common features of these
+ * APIs that are central to the PAL interface:
+ *
+ * - All shader stages, and some additional "shader adjacent" state, are glommed together into a monolithic pipeline
+ *   object.
+ * - Explicit, free-threaded command buffer generation.
+ * - Support for multiple, asynchronous engines for executing GPU work (graphics, compute, DMA).
+ * - Explicit system and GPU memory management.
+ * - Flexible shader resource binding model.
+ * - Explicit management of stalls, cache flushes, and compression state changes.
+ *
+ * However, as a common component supporting multiple APIs, the PAL interface tends to be lower level in places where
+ * client APIs diverge.
+ *
+ * ### Settings
+ * The PAL library has a number of configuration settings available for the client to modify either programmatically
+ * or via external settings.  PAL also includes infrastructure for building/loading client-specific settings.
+ * See @ref Settings for a detailed description of this support.
+ *
+ * ### Initialization
+ * The first step to interacting with the PAL core is creating an IPlatform object and enumerating IDevice objects
+ * representing GPUs attached to the system and, optionally, IScreen objects representing displays attached to the
+ * system.  See @ref LibInit for a detailed description.
+ *
+ * ### System Memory Allocation
+ * Clients have a lot of control over PAL's system memory allocations.  Most PAL objects require the client to provide
+ * system memory; the client first calls a GetSize() method and then passes a pointer to PAL on the actual create call.
+ * Further, when PAL needs to make an internal allocation, it will optionally call a client callback, which can be
+ * specified on platform creation.  This callback will specify a category for the allocation, which may imply an
+ * expected lifetime.
+ *
+ * ### Interface Classes
+ * The following diagram illustrates the relationship of some key PAL interfaces and how they interact to render a
+ * typical frame in a modern game.  Below that is a listing of all of PAL's interface classes, and a very brief
+ * description of their purpose.  Follow the link for each interface to see detailed reference documentation.
+ *
+ * @image html scheduling.png
+ *
+ * - __OS Abstractions__
+ *   + _IPlatform_: Root-level object created by clients that interact with PAL.  Mostly responsible for enumerating
+ *                  devices and screens attached to the system and returning any system-wide properties.<br><br>
+ *   + _IDevice_: Configurable context for querying properties of a particular GPU and interacting with it.  Acts as a
+ *                factory for almost all other PAL objects.<br><br>
+ *   + _IQueue_: A device has one or more _engines_ which are able to issue certain types of work.  Tahiti, for example,
+ *               has 1 universal engine (supports graphics, compute, or copy commands), 2 compute engines (support
+ *               compute or copy commands), and 2 DMA engines (support only copy commands).  An IQueue object is a
+ *               context for submitting work on a particular engine.  This mainly takes the form of submitting command
+ *               buffers and presenting images to the screen.  Work performed in a queue will be started in order, but
+ *               work executed on different queues (even if the queues reference the same engine) is not guaranteed
+ *               to be ordered without explicit synchronization.<br><br>
+ *   + _IQueueSemaphore_: Queue semaphores can be signaled and waited on from an IQueue in order to control execution
+ *                        order between queues.<br><br>
+ *   + _IFence_: Used for coarse-grain CPU/GPU synchronization.  Fences can be signalled from the GPU as part of a
+ *               command buffer submission on a queue, then waited on from the CPU.<br><br>
+ *   + _IGpuMemory_: Represents a GPU-accessible memory allocation.  Can either be virtual (only VA allocation which
+ *                   must be explicitly mapped via an IQueue operation) or physical.  Residency of physical allocations
+ *                   must be managed by the client either globally for a device (IDevice::AddGpuMemoryReferences) or by
+ *                   specifying allocations referenced by command buffers at submit.<br><br>
+ *   + _ICmdAllocator_: GPU memory allocation pool used for backing an ICmdBuffer.  The client is free to create one
+ *                      allocator per device, or one per thread to remove thread contention.<br><br>
+ *   + _IScreen_: Represents a display attached to the system.  Mostly used for managing full-screen flip
+ *                presents.<br><br>
+ *   + _IPrivateScreen_: Represents a display that is not otherwise visible to the OS, typically a VR head mounted
+ *                       display.<br><br>
+ * - __Hardware IP Abstractions__
+ *    + __All IP__
+ *      - _ICmdBuffer_: Clients build command buffers to execute the desired work on the GPU, and submit them on a
+ *                      corresponding queue.  Different types of work can be executed depending on the _queueType_ of
+ *                      the command buffer (graphics work, compute work, DMA work).<br><br>
+ *      - _IImage_: Images are a 1D, 2D, or 3D collection of pixels (i.e., _texture_) that can be accessed by the
+ *                  GPU in various ways: texture sampling, BLT source/destination, UAV, etc.<br><br>
+ *    + __GFXIP-only__
+ *      - _IShader_: Container for shader byte code used as an input to pipeline creation.  No compilation occurs
+ *                   until an IPipeline is created.  Currently, AMDIL is the only supported input language.<br><br>
+ *      - _IPipeline_: Comprised of all shader stages (CS for compute, VS/HS/DS/GS/PS for graphics), resource mappings
+ *                     describing how user data entries are to be used by the shaders, and some other fixed-function
+ *                     state like depth/color formats, blend enable, MSAA enable, etc.<br><br>
+ *      - _IColorTargetView_: IImage view allowing the image to be bound as a color target (i.e., RTV.).<br><br>
+ *      - _IDepthStencilView_: IImage view allowing the image to be bound as a depth/stencil target (i.e., DSV).<br><br>
+ *      - _IGpuEvent_: Used for fine-grained (intra-command buffer) synchronization between the CPU and GPU.  GPU
+ *                     events can be set/reset from either the CPU or GPU and waited on from either.<br><br>
+ *      - _IQueryPool_: Collection of query slots for tracking occlusion or pipeline stats query results.<br><br>
+ *      - __Dynamic State Objects__: _IColorBlendState_, _IDepthStencilState_, _IMsaaState_, _IScissorState_,
+ *                                   and _IViewportState_ define logical collections of related fixed function graphics
+ *                                   state, similar to DX11.<br><br>
+ *      - _IPerfExperiment_: Used for gathering performance counter and thread trace data.<br><br>
+ *      - _IBorderColorPalette_: Provides a collection of indexable colors for use by samplers that clamp to an
+ *                               arbitrary border color.<br><br>
+ * - __Common Base Classes__
+ *   + _IDestroyable_: Defines a _Destroy()_ method for the PAL interface.  Calling _Destroy()_ will release any
+ *                     internally allocated resources for the object, but the client is still responsible for freeing
+ *                     the system memory provided for the object.<br><br>
+ *   + _IGpuMemoryBindable_: Defines a set of methods for binding GPU memory to the object.  Interfaces that inherit
+ *                           _IGpuMemoryBindable_ require GPU memory in order to be used by the GPU.  The client
+ *                           must query the requirements (e.g., alignment, size, heaps) and allocate/bind GPU memory
+ *                           for the object.  _IGpuMemoryBindable_ inherits from _IDestroyable_.<br><br>
+ *
+ * ### %Format Info
+ * Several helper methods are available for dealing with image formats in the @ref Formats namespace.
+ *
+ * ### Graphics/Compute Execution Model
+ * Most graphics/compute work is defined by first binding a set of states then issuing a draw or dispatch command to
+ * kick off the work.  The complete set of graphics states available in PAL is illustrated below; compute is a subset
+ * of this that only includes the pipeline, user data entries, and border color palette.
+ *
+ * @image html stateBreakdown.jpg
+ *
+ * Most of these correspond directly to a PAL interface object above, and these items are bound by calling a
+ * corresponding _CmdBind...()_ method in the ICmdBuffer interface.  The states marked in yellow and orange, however,
+ * are _immediate_ states for which there is no object, you just specify the required state values in the corresponding
+ * _CmdSet...()_ method in the ICmdBuffer interface.
+ *
+ * User data entries are the way that input resources are specified for the pipeline on an upcoming draw/dispatch.  This
+ * mapping is complicated, and is described fully in @ref ResourceBinding.
+ *
+ * A final complication worth noting is that PAL provides no implicit surface synchronization.  The client is
+ * responsible for explicitly inserting barriers to resolve data hazards, flush/invalidate caches, and ensure images
+ * are in the proper compression state.  For more detail, see ICmdBuffer::CmdReleaseThenAcquire, CmdRelease, CmdAcquire,
+ * CmdReleaseEvent, CmdAcquireEvent and AcquireReleaseInfo.
+ *
+ ***********************************************************************************************************************
+ */
+
+} // Pal
@@ -0,0 +1,204 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palCmdAllocator.h
+ * @brief Defines the Platform Abstraction Library (PAL) ICmdAllocator interface and related types.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palDestroyable.h"
+
+namespace Pal
+{
+
+// Forward declarations.
+struct GpuMemSubAllocInfo;
+class  IGpuMemory;
+
+/// Flags controlling the creation of ICmdAllocator objects.
+union CmdAllocatorCreateFlags
+{
+    struct
+    {
+        uint32 threadSafe               :  1; ///< If set, the allocator will acquire a lock each time it is accessed;
+                                              ///  otherwise it will not attempt to protect itself from multithreaded
+                                              ///  access.
+        uint32 autoMemoryReuse          :  1; ///< If set, the allocator will track when the GPU finishes accessing
+                                              ///  each piece of command memory and attempt to reuse memory which the
+                                              ///  GPU is done with before allocating more memory from the OS.  If not
+                                              ///  set, memory will only be recycled after a call to
+                                              ///  @ref ICmdAllocator::Reset().
+        uint32 disableBusyChunkTracking :  1; ///< If set, the allocator will not do any GPU-side tracking of which
+                                              ///  command chunks are still in use.  It will be the client's (or the
+                                              ///  application's) responsibility to guarantee that command chunks are
+                                              ///  not returned to the allocator before the GPU has finished processing
+                                              ///  them.  Failure to guarantee this will result in undefined behavior.
+                                              ///  This flag has no effect if @ref autoMemoryReuse is not set.
+        uint32 autoTrimMemory           :  1; ///< If set the allocator will automatically trim down the allocations
+                                              ///  (where all chunks are idle on the freeList). A minimum of
+                                              ///  allocFreeThreshold allocations are kept for fast reuse.
+        uint32 reserved                 : 28; ///< Reserved for future use.
+    };
+
+    uint32     u32All;          ///< Flags packed as 32-bit uint.
+};
+
+/// Different type of allocation data that an ICmdAllocator allocates and distributes to command buffers.
+enum CmdAllocType : uint32
+{
+    CommandDataAlloc  = 0,  ///< Data allocated is for executable commands.
+    EmbeddedDataAlloc,      ///< Data allocated is for embedded data.
+    LargeEmbeddedDataAlloc, ///< Data allocated is for embedded data, allocation is >32kb
+    GpuScratchMemAlloc,     ///< Data allocated is GPU-only accessible at command buffer execution-time.  Possible
+                            ///  uses like GPU events.
+    CmdAllocatorTypeCount   ///< Number of allocation types for ICmdAllocator's.
+};
+
+/// Specifies properties for creation of an ICmdAllocator object.  Input structure to IDevice::CreateCmdAllocator().
+struct CmdAllocatorCreateInfo
+{
+    CmdAllocatorCreateFlags flags;        ///< Flags controlling command allocator creation.
+
+    struct
+    {
+        GpuHeap             allocHeap;    ///< Preferred allocation heap.  For @ref GpuScratchMemAlloc, this field is
+                                          ///  ignored and the allocation will always be in GPU-invisible memory.  For
+                                          ///  all other allocation types, this must be CPU-mappable.
+                                          ///  For best performance, command allocators that will be used by the
+                                          ///  UVD engine should prefer the Local heap
+        gpusize             allocSize;    ///< Size, in bytes, of the GPU memory allocations this allocator will create.
+                                          ///  It must be an integer multiple of suballocSize.
+        gpusize             suballocSize; ///< Size, in bytes, of the chunks of GPU memory this allocator will give to
+                                          ///  command buffers.  It must be an integer multiple of 4096.
+                                          ///  Must be greater than zero even if the client doesn't plan on using this
+                                          ///  allocation type.
+        uint32      allocFreeThreshold;   ///< Minimum count of free allocations that the allocator should keep around
+                                          ///  for fast reuse. It is used when the autoTrimMemory flag is set.
+    } allocInfo[CmdAllocatorTypeCount];   ///< Information for each allocation type.
+};
+
+/// Output structure for QueryUtilizationInfo().
+/// The CmdAllocator utilization data can be queried by PAL clients in order to decide whether to trim the allocations.
+struct CmdAllocatorUtilizationInfo
+{
+    uint32  numAllocations;     ///< Number of allocations owned by the allocator.
+    uint32  numFreeChunks;      ///< Number of chunks that are reset and not in use.
+    uint32  numBusyChunks;      ///< Number of chunks that in use by the GPU.
+    uint32  numReuseChunks;     ///< Number of chunks that have been 'returned' to the allocator for reuse.
+};
+
+/**
+ ***********************************************************************************************************************
+ * @interface ICmdAllocator
+ * @brief     Allocates and distributes GPU memory to command buffers on the client's behalf.
+ *
+ * All ICmdBuffer objects must be associated with an ICmdAllocator at creation. Command buffers may switch command
+ * allocators when ICmdBuffer::Reset() is called. The set of command buffers associated with a given command allocator
+ * will query that allocator for additional GPU memory as they are building commands.
+ *
+ * To protect against race conditions the client must ask for a thread safe command allocator unless its can guarantee
+ * that all command buffers associated with a given command allocator will be built, reset, and destroyed in a thread-
+ * safe manner. It is illegal to destroy a command allocator while it still has command buffers associated with it.
+ *
+ * @see IDevice::CreateCmdAllocator()
+ ***********************************************************************************************************************
+ */
+class ICmdAllocator : public IDestroyable
+{
+public:
+    /// Explicitly resets a command allocator, marking all internal GPU memory allocations as unused.
+    ///
+    /// The client is responsible for guaranteeing that all command buffers associated with this allocator have finished
+    /// GPU execution and have been explicitly reset before calling this function.
+    ///
+    /// @param [in] freeMemory If the all GPU and CPU memory allocations should be returned to the OS.
+    ///
+    /// @returns Success if the command allocator was successfully reset.  Otherwise, one of the following errors may be
+    ///          returned:
+    ///          + ErrorUnknown if an internal PAL error occurs.
+    virtual Result Reset(bool freeMemory) = 0;
+
+    /// Explicitly trims a command allocator, deleting as many unused internal GPU memory allocations as possible.
+    ///
+    /// @returns Success if the command allocator was successfully trimmed.
+    ///
+    /// @param [in] allocTypeMask    Gives control whether trimming will be applied for each CmdAllocType.
+    ///                              Use (1 << CmdAllocatorTypeCount) - 1 to apply trimming to all types.
+    ///                              When trimming only the embedded date use (1 << EmbeddedDataAlloc).
+    /// @param [in] dynamicThreshold Minimum count of free allocations that the allocator should keep around
+
+    virtual Result Trim(uint32 allocTypeMask, uint32 dynamicThreshold) = 0;
+
+    /// Query the numbers of allocations and chunks of the given CmdAllocator type.
+    /// This may help clients to decide whether they may apply trimming or not.
+    ///
+    /// @returns Success if valid values can be reported.
+    ///
+    /// @param [in]  type             CmdAllocType that is being queried
+    /// @param [out] pUtilizationInfo The allocation and chunk counts will be stored here.
+
+    virtual Result QueryUtilizationInfo(CmdAllocType type, CmdAllocatorUtilizationInfo* pUtilizationInfo) const = 0;
+
+    /// Returns the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @returns Pointer to client data.
+    void* GetClientData() const
+    {
+        return m_pClientData;
+    }
+
+    /// Sets the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @param  [in]    pClientData     A pointer to arbitrary client data.
+    void SetClientData(
+        void* pClientData)
+    {
+        m_pClientData = pClientData;
+    }
+
+protected:
+    /// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
+    /// called the proper create method.
+    ICmdAllocator() : m_pClientData(nullptr) {}
+
+    /// @internal Destructor.  Prevent use of delete operator on this interface.  Client must destroy objects by
+    /// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
+    /// object on their own.
+    virtual ~ICmdAllocator() { }
+
+private:
+    /// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
+    /// and set via SetClientData().
+    /// For non-top-layer objects, this will point to the layer above the current object.
+    void* m_pClientData;
+};
+
+} // Pal
@@ -0,0 +1,370 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palCmdTracking.h
+ * @brief Defines a number of support classes used for construction and storage of struct TrackedCmdLocation
+ *      defined in trackedCmdLocation.h
+ *
+ *      - struct TrackingEventInfo:             A single from uint8 to name, used for logging
+ *      - class TrackedCmdSupportBase      A set of TrackingEventInfo, maintained outside of Pal
+ *      - class TrackedCmdLocationArray     The arrays for TrackedCmdLocation's used for reporting
+ *                                              correlation data through ICmdBufferReporting::CorrelationReportOnSubmit
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palVector.h"
+
+#include "trackedCmdLocation.h"
+
+namespace Pal
+{
+
+// forward decl
+class Platform;
+
+namespace CmdDisassembly
+{
+
+// forward definition
+class TrackedCmdLocationArray;
+
+/**
+************************************************************************************************************************
+* @brief    class TrackedCmdLocationRef
+*           A copyable reference to a member in a TrackedCmdLocationArray, invariant to that array be
+*           re-allocated.
+*
+* @detail   Is simply a pointer to a TrackedCmdLocationArray, and an index in to that array
+*
+************************************************************************************************************************
+*/
+class TrackedCmdLocationRef
+{
+public:
+    TrackedCmdLocationRef()
+        : m_pSourceArray(nullptr),
+        m_index(0)
+    {
+    }
+
+    TrackedCmdLocationRef(
+        TrackedCmdLocationArray*    pSourceArray,
+        Util::uint32                index)
+        : m_pSourceArray(pSourceArray),
+        m_index(index)
+    {
+    }
+
+    TrackedCmdLocationRef(
+        TrackedCmdLocationRef&& other) = default;
+    TrackedCmdLocationRef(
+        TrackedCmdLocationRef const& other) = default;
+    TrackedCmdLocationRef& operator=(
+        TrackedCmdLocationRef&& other) = default;
+    TrackedCmdLocationRef& operator=(
+        TrackedCmdLocationRef const& other) = default;
+
+    bool operator==(
+        TrackedCmdLocationRef const& other) const
+        { return (this->m_pSourceArray == other.m_pSourceArray) && (this->m_index == other.m_index); }
+    bool operator!=(
+        TrackedCmdLocationRef const& other) const
+        { return (this->m_pSourceArray != other.m_pSourceArray) || (this->m_index != other.m_index); }
+
+    TrackedCmdLocation* Use();
+    const TrackedCmdLocation* Get() const;
+
+    Util::uint32 GetIndex() const
+    {
+        return m_index;
+    }
+
+    /// Helper functions
+    ///
+
+    /// Clears the TrackedCmdLocation referred to by this TrackedCmdLocationRef
+    ///
+    /// @returns
+    ///     Result::ErrorInvalidPointer if (IsValid() == false)
+    ///     Result::Success if successful
+    Result Clear();
+
+    /// @returns
+    ///     TrackedCmdLocationMode::Invalid if (IsValid() == false)
+    ///     Get()->m_mode otherwise
+    TrackedCmdLocationMode GetMode() const;
+
+    /// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
+    ///     to mode TrackedCmdLocationMode::Before
+    ///
+    /// @param [in] eventId     Refers to an uint8 event that has a begin and/or an end associated with it
+    ///                         Most likely, a value registered to a TrackedCmdSupportBase
+    /// @param [in] beforePtr   The end pointer for the cmdList being tracked before the event referred to by eventId
+    ///                         Only 48-bits of beforePtr are used
+    ///
+    /// @returns
+    ///     Result::ErrorInvalidPointer if (IsValid() == false)
+    ///     Result::Success if successful
+    Result SetAsBefore(
+        uint8   eventId,
+        uint64  beforePtr);
+
+    /// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
+    ///     to mode TrackedCmdLocationMode::After
+    ///
+    /// @param [in] eventId     Refers to an uint8 event that has a begin and/or an end associated with it
+    ///                         Most likely, a value registered to a TrackedCmdSupportBase
+    /// @param [in] afterPtr   The end pointer for the cmdList being tracked after the event referred to by eventId
+    ///                        Only 48-bits of afterPtr are used
+    ///
+    /// @returns
+    ///     Result::ErrorInvalidPointer if (IsValid() == false)
+    ///     Result::Success if successful
+    Result SetAsAfter(
+        uint8   eventId,
+        uint64  afterPtr);
+
+    /// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
+    ///     to mode TrackedCmdLocationMode::Delta, with no begin or end (ie, no data can be written to
+    ///     the cmdList being tracked "during" the event referred to be eventId
+    ///
+    /// @param [in] eventId     Refers to an uint8 event that does not have a begin and/or an end associated with it
+    ///                         Such as Pal::CmdDisassembly::TrackedCmdLocation::PostClientEvent
+    /// @param [in] ptr         The end pointer for the cmdList being tracked after the event referred to by eventId
+    ///                         Only 48-bits of ptr are used
+    ///
+    /// @returns
+    ///     Result::ErrorInvalidPointer if (IsValid() == false)
+    ///     Result::Success if successful
+    Result SetAsEmptyDelta(
+        uint8   eventId,
+        uint64  ptr);
+
+    /// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
+    ///     to mode TrackedCmdLocationMode::ClientId
+    ///
+    /// @param [in] clientId    A 61-bit bit value used by the client application to identify which cmdList is being
+    ///                         tracked
+    ///
+    /// @returns
+    ///     Result::ErrorInvalidPointer if (IsValid() == false)
+    ///     Result::Success if successful
+    Result SetAsClientId(
+        uint64 clientId);
+
+    /// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
+    ///     to mode TrackedCmdLocationMode::ClientEventId
+    ///
+    /// @param [in] clientEventId   A 61-bit bit value used by the client application to identify
+    ///                             a client event  relative to the current end position of the cmdList being tracked
+    ///
+    /// @returns
+    ///     Result::ErrorInvalidPointer if (IsValid() == false)
+    ///     Result::Success if successful
+    Result SetAsClientEvent(
+        uint64 clientEventId);
+
+    /// @brief  bool TrackedCmdLocation::TrySetAsDelta(uint64 afterPtr)
+    ///         Will attempt to set this TrackedCmdLocation to type TrackedCmdLocationMode::Delta
+    ///
+    /// @detail If GetMode() == TrackedCmdLocationMode::Before and afterPtr - m_correlateInternal.m_ptr is small
+    ///         enough to be encoded in m_correlateInternal.m_deltaInDWords, the mode will be altered to
+    ///         TrackedCmdLocationMode::Delta, with afterPtr - m_correlateInternal.m_ptr encoded in
+    ///         m_correlateInternal.m_deltaInDWords.
+    ///         If this attempt fails, the calling function should instead create a TrackedCmdLocationMode::After
+    ///         TrackedCmdLocation
+    ///
+    /// @param  [in] afterPtr, the value a TrackedCmdLocationMode::After would have for m_correlateInternal.m_ptr
+    /// @return Result::Success if it was possible to set this TrackedCmdLocation to type
+    ///             TrackedCmdLocationMode::Delta
+    ///         Result::Unsupported if the conditions described above are not met.
+    Result TrySetAsDelta(
+        uint64 afterPtr);
+
+private:
+    TrackedCmdLocationArray*    m_pSourceArray;
+    Util::uint32                m_index;
+
+    Result SetMode(
+        TrackedCmdLocationMode mode);
+};
+
+/// @brief  struct TrackingEventInfo
+///     Essentially just a name, plus a boolean to indicate whether the name is valid / has been set
+struct TrackingEventInfo
+{
+    Util::StringView<char>  name;
+    bool                    isValid;
+
+    TrackingEventInfo()
+        : isValid(false)
+    {}
+};
+
+/**
+************************************************************************************************************************
+* @brief    class TrackedCmdSupportBase translates eventId's to strings for internal correlation events
+*
+* @detail   For use in Pal::Queue when dumping to text files. Corresponds to
+*           TrackedCmdLocation::m_correlateInternal.m_event for the cases where TrackedCmdLocation::m_mode
+*           is not TrackedCmdLocationMode::ClientEvent
+*
+*           The implementation for this is in whatever client of Pal that is creating the internal correlation events,
+*
+************************************************************************************************************************
+*/
+class TrackedCmdSupportBase
+{
+public:
+    virtual ~TrackedCmdSupportBase() = default;
+
+    void SetEventIdName(
+        uint8           eventId,
+        const char*     name)
+    {
+        PAL_ASSERT(static_cast<uint32>(eventId) < NumUInt8Values);
+        m_allEventsMap[eventId].name = name;
+        m_allEventsMap[eventId].isValid = true;
+    }
+
+    TrackingEventInfo const& GetEventInfo(
+        uint8 eventId) const
+    {
+        PAL_ASSERT(static_cast<uint32>(eventId) < NumUInt8Values);
+        return m_allEventsMap[eventId];
+    }
+
+protected:
+    static constexpr uint32 NumUInt8Values = UINT8_MAX + 1;
+
+    TrackingEventInfo m_allEventsMap[NumUInt8Values];
+
+    TrackedCmdSupportBase() = default;
+};
+
+/**
+************************************************************************************************************************
+* @brief    class TrackedCmdLocationArray is simple a TrackedCmdLocationVec together with a clientId
+*           and some helpers. TrackedCmdLocationArray live on Pal::GfxCmdBuffer
+*
+* @detail Each Pal::GfxCmdBuffer has at most CmdDisassembly::MaxNumSubCmdBuffers TrackedCmdLocationArray's
+*       corresponding to Pal::GfxCmdBuffer::NumCmdStreams();
+*
+*       The clientId used for TrackedCmdLocationArray::m_clientId, corresponds to the client Id used in
+*       TrackedCmdLocation::m_clientId.m_clientId
+*
+*       For the moment, the underlying implementation used is
+*       Util::Vector<TrackedCmdLocation, DefaultCapacity, Pal::Platform>, but could be changed to use a Chunk
+*       scheme, especially as sizes of cmdLists can become very large.
+*       The only requirement to a change, is for TrackedCmdLocationRef continues to function as an accessor
+*
+*       Note that the functions in TrackedCmdLocationArray are not designed for thread-safety, as they are
+*       issued from command-list-building functions that are, in their turn, not thread safe. Adding mutex
+*       behavior here would potentially hide issues relating to thread-safety.
+*
+************************************************************************************************************************
+*/
+class TrackedCmdLocationArray
+{
+public:
+    static constexpr uint32 DefaultCapacity = 1024;
+    static constexpr uint32 BadIndex = UINT32_MAX;
+    static constexpr uint64 InvalidClientId = UINT64_MAX;
+
+    typedef Util::Vector<TrackedCmdLocation, DefaultCapacity, Pal::Platform> TrackedCmdLocationVec;
+
+    static uint32 GetTrackedCmdLocationArraySizeInBytes()
+    {
+        return sizeof(TrackedCmdLocationArray);
+    }
+
+    static TrackedCmdLocationArray* CreateTrackedCmdLocationArray(
+        void*           pMemory,
+        Pal::Platform*  pPlatform);
+
+    void Reset()
+    {
+        m_lastLocation = TrackedCmdLocationRef(this, BadIndex);
+        m_clientId = InvalidClientId;
+        m_locations.Clear();
+    }
+
+    void Destroy();
+
+    uint64 GetClientId() const
+    {
+        return m_clientId;
+    }
+
+    Result SetClientId(
+        uint64 clientId);
+
+    Util::uint32 GetTotalSize() const
+    {
+        return m_locations.size();
+    }
+
+    const TrackedCmdLocationVec& GetLocationsVec() const
+    {
+        return m_locations;
+    }
+
+    TrackedCmdLocationVec& UseLocationsVec()
+    {
+        return m_locations;
+    }
+
+    Pal::Result MakeNext(
+        TrackedCmdLocationRef* pResult);
+
+    const TrackedCmdLocationRef GetLast() const
+    {
+        return m_lastLocation;
+    }
+
+    bool IsLast(
+        TrackedCmdLocationRef const& location) const
+    {
+        return location == m_lastLocation;
+    }
+
+private:
+    TrackedCmdLocationVec   m_locations;
+    Pal::Platform*          m_pPlatform;
+    uint64                  m_clientId;
+    TrackedCmdLocationRef   m_lastLocation;
+
+    TrackedCmdLocationArray(
+        Pal::Platform* pPlatform);
+
+    ~TrackedCmdLocationArray() = default;
+};
+
+} // namespace CmdDisassembly
+} // namespace Pal
@@ -0,0 +1,70 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palDestroyable.h
+ * @brief Defines the Platform Abstraction Library (PAL) IDestroyable interface.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+namespace Pal
+{
+
+/**
+ ***********************************************************************************************************************
+ * @interface IDestroyable
+ * @brief     Interface inherited by objects that must be explicitly destroyed by the client.
+ *
+ * This includes all objects except:
+ *
+ * + @ref IColorTargetView, @ref IDepthStencilView - These classes are treated as SRDs by the DX12 runtime.  Therefore,
+ *   PAL guarantees that no action needs to be taken at Destroy() - the client should just free the memory backing these
+ *   classes.
+ * + @ref IDevice - These objects are created during IPlatform::EnumerateDevices() and are automatically destroyed
+ *   along with the Platform object.
+ * + @ref IPrivateScreen - These objects are created as during IPlatform::EnumerateDevices() based on
+ *   which screens are attached to each device.  They are automatically destroyed along with the Platform object.
+ ***********************************************************************************************************************
+ */
+class IDestroyable
+{
+public:
+    /// Frees all resources associated with this object.
+    ///
+    /// It is the client's responsibility to only call this method once there are no more existing references to this
+    /// object.  This method does not free the system memory associated with the object (as specified in pPlacementAddr
+    /// during creation); the client is responsible for freeing that memory since they allocated it.
+    virtual void Destroy() = 0;
+
+protected:
+    /// @internal Destructor.  Prevent use of delete operator on this interface.  Client must destroy objects by
+    /// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
+    /// object on their own.
+    virtual ~IDestroyable() { }
+};
+
+} // Pal
@@ -0,0 +1,626 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2016-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palDeveloperHooks.h
+ * @brief Common include for PAL developer callbacks. Defines common enums, typedefs, structures, etc.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palCmdBuffer.h"
+
+namespace Pal
+{
+
+// Forward declarations.
+class ICmdBuffer;
+class IImage;
+class IPipeline;
+
+namespace Developer
+{
+
+/// The type of the developer callback so the callback can properly perform whatever actions it needs.
+///
+/// @see Callback
+enum class CallbackType : uint32
+{
+    AllocGpuMemory = 0,     ///< This callback is to inform that GPU memory has been allocated.
+    FreeGpuMemory,          ///< This callback is to inform that GPU memory has been freed.
+    PresentConcluded,       ///< This callback is to inform that a present has concluded.
+    ImageBarrier,           ///< This callback is to inform that a barrier is being executed.
+    CreateImage,            ///< This callback is to inform that an image has been created.
+    BarrierBegin,           ///< This callback is to inform that a barrier is about to be executed.
+    BarrierEnd,             ///< This callback is to inform that a barrier is done being executed.
+    DrawDispatch,           ///< This callback is to inform that a draw or dispatch command is being recorded.
+    BindPipeline,           ///< This callback is to inform that a pipeline (client or internal) has been bound.
+    SurfRegData,            ///< This callback is to inform tools of the register state of a surface.
+#if PAL_DEVELOPER_BUILD
+    DrawDispatchValidation, ///< This callback is to describe the state validation needed by a draw or dispatch.
+    BindPipelineValidation, ///< This callback is to describe the state validation needed by a pipeline bind.
+    OptimizedRegisters,     ///< This callback is to describe the PM4 optimizer's removal of redundant register
+                            ///  sets.
+#endif
+    BindGpuMemory,          ///< This callback is to inform of a new binding to GPU memory.
+    SubAllocGpuMemory,      ///< This callback is to inform of suballocation from base GPU memory allocation.
+    SubFreeGpuMemory,       ///< This callback is to inform that GPU memory suballocation has been freed.
+#if PAL_DEVELOPER_BUILD
+    RpmBlt,                 ///< This callback is to describe the internal RPM blt calls.
+#endif
+    Count,                  ///< The number of info types.
+};
+
+constexpr uint32 AllCallbackTypesMask = Util::BitfieldGenMask(static_cast<uint32>(CallbackType::Count));
+
+constexpr uint32 DefaultDisabledCallbackTypes = (1 << static_cast<uint32>(CallbackType::BindGpuMemory))     |
+                                                (1 << static_cast<uint32>(CallbackType::SubAllocGpuMemory)) |
+                                                (1 << static_cast<uint32>(CallbackType::SubFreeGpuMemory));
+
+constexpr uint32 DefaultEnabledCallbackTypes = AllCallbackTypesMask & ~DefaultDisabledCallbackTypes;
+
+/// Definition for developer callback.
+///
+/// @param [in] pPrivateData    Private data that is installed with the callback for use by the installer.
+/// @param [in] deviceIndex     Unique index for the device so that the installer can properly dispatch the event.
+/// @param [in] infoType        Information about the callback so the installer can make informed decisions about
+///                             what actions to perform.
+/// @param [in] pInfoData       Additional data related to the particular callback type.
+typedef void (PAL_STDCALL *Callback)(
+    void*           pPrivateData,
+    const uint32    deviceIndex,
+    CallbackType    type,
+    void*           pCbData);
+
+/// Enumeration describing the different ways GPU memory is allocated.
+enum class GpuMemoryAllocationMethod : uint32
+{
+    Unassigned = 0,                         ///< Unassigned allocation method.
+    Normal,                                 ///< Virtual memory allocation (not pinned/peer).
+    Pinned,                                 ///< Pinned memory allocation.
+    Peer,                                   ///< Peer memory allocation.
+    MultiDevice,                            ///< MultiDevice memory allocation.
+    Opened,                                 ///< Shared memory allocation.
+    Svm,                                    ///< Shared virtual memory allocation.
+};
+
+/// Enumeration describing the different Presentation modes an application can take.
+enum class PresentModeType : uint32
+{
+    Unknown = 0,                            ///< When the present mode is not known.
+    Flip,                                   ///< when the presentation surface is used directly as the front buffer.
+    Composite,                              ///< When the flipped image is drawn by a window compositor instead
+                                            ///  of the application.
+    Blit,                                   ///< when the presentation surface is copied to the front buffer.
+};
+
+/// Information about the presentation mode an application is in.
+struct PresentationModeData
+{
+    PresentModeType presentationMode;       ///< Information about present mode from above enumeration.
+    UniquePresentKey presentKey;            ///< Identifies the window/swap chain, etc. used to present.
+};
+
+/// Information for allocation/deallocation of GPU memory.
+struct GpuMemoryData
+{
+    gpusize size;                           ///< Size, in bytes, of the allocation.
+    GpuHeap heap;                           ///< The first requested heap of the allocation.
+
+    /// Allocation description flags
+    struct Flags
+    {
+        uint32 isClient         :  1;       ///< This allocation is requested by the client.
+        uint32 isFlippable      :  1;       ///< This allocation is marked as flippable.
+        uint32 isUdmaBuffer     :  1;       ///< This allocation is for a UDMA buffer.
+        uint32 isVirtual        :  1;       ///< This allocation is for virtual memory.
+        uint32 isCmdAllocator   :  1;       ///< This allocation is for a CmdAllocator.
+        uint32 isExternal       :  1;       ///< This allocation is marked as external.
+        uint32 buddyAllocated   :  1;       ///< This allocation is buddy allocated.
+        uint32 appRequested     :  1;       ///< This allocation is Pal internal, but application requested
+        uint32 reserved         : 24;       ///< Reserved for future use.
+    } flags;                                ///< Flags describing the allocation.
+
+    GpuMemoryAllocationMethod allocMethod;  ///< Allocation method
+    const IGpuMemory*         pGpuMemory;   ///< Handle to the Pal::IGpuMemory object of this GPU memory allocation
+    gpusize                   offset;       ///< Offset, in bytes, of a suballocation within a base allocation.  For
+                                            ///  base allocations, offset is always zero.
+};
+
+#if PAL_DEVELOPER_BUILD
+/// PWS acquire point for barrier logger
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 901
+enum AcquirePoint : uint8
+{
+    AcquirePointPfp,
+    AcquirePointMe,
+    AcquirePointPreShader,
+    AcquirePointPreDepth,
+    AcquirePointPrePs,
+    AcquirePointPreColor,
+    AcquirePointEop,
+
+    AcquirePointCount
+};
+#else
+enum class AcquirePoint : uint8
+{
+    Pfp = 0,
+    Me,
+    PreShader,
+    PreDepth,
+    PrePs,
+    PreColor,
+    Eop, // Invalid, for internal optimization purpose.
+
+    Count
+};
+#endif
+#endif
+
+/// Information pertaining to the cache flush/invalidations and stalls performed during barrier execution.
+struct BarrierOperations
+{
+    union
+    {
+        struct
+        {
+            uint16 eopTsBottomOfPipe              : 1;  ///< Issue an end-of-pipe event that can be waited on.
+                                                        ///  When combined with waitOnTs, makes a full pipeline stall.
+            uint16 vsPartialFlush                 : 1;  ///< Stall at ME, waiting for all prior VS waves to complete.
+            uint16 psPartialFlush                 : 1;  ///< Stall at ME, waiting for all prior PS waves to complete.
+            uint16 csPartialFlush                 : 1;  ///< Stall at ME, waiting for all prior CS waves to complete.
+            uint16 pfpSyncMe                      : 1;  ///< Stall PFP until ME is at same point in command stream.
+                                                        ///  flushed/invalidated are specified in the caches bitfield.
+            uint16 syncCpDma                      : 1;  ///< Issue dummy cpDma command to confirm all prior cpDmas have
+                                                        ///  completed.
+            uint16 eosTsPsDone                    : 1;  ///< Issue an end-of-pixel-shader event that can be waited on.
+            uint16 eosTsCsDone                    : 1;  ///< Issue an end-of-compute-shader event that can be waited on
+            uint16 waitOnTs                       : 1;  ///< Wait on an timestamp event (EOP or EOS) at the ME.
+                                                        ///  Which event is not necesarily specified here, though any
+                                                        ///  that are specified here would be waited on.
+            uint16 reserved                       : 7;  ///< Reserved for future use.
+        };
+
+        uint16 u16All;  ///< Unsigned integer containing all the values.
+
+    } pipelineStalls; ///< Information about pipeline stalls performed.
+
+    union
+    {
+        struct
+        {
+            uint16 depthStencilExpand      : 1; ///< Decompression of depth/stencil image.
+            uint16 htileHiZRangeExpand     : 1; ///< Expansion of HTile's HiZ range.
+            uint16 depthStencilResummarize : 1; ///< Resummarization of depth stencil.
+            uint16 dccDecompress           : 1; ///< DCC decompress BLT for color images.
+            uint16 fmaskDecompress         : 1; ///< Fmask decompression for shader readability.
+            uint16 fastClearEliminate      : 1; ///< Expand latest specified clear color into pixel data for the fast
+                                                ///  cleared color/depth resource.
+            uint16 fmaskColorExpand        : 1; ///< Completely decompresses the specified color resource.
+            uint16 initMaskRam             : 1; ///< Memsets uninitialized memory to prepare it for use as
+                                                ///  CMask/FMask/DCC/HTile.
+            uint16 updateDccStateMetadata  : 1; ///< DCC state metadata was updated.
+            uint16 reserved                : 7; ///< Reserved for future use.
+        };
+
+        uint16 u16All; ///< Unsigned integer containing all the values.
+
+    } layoutTransitions; ///< Information about layout translation performed.
+
+    union
+    {
+        struct
+        {
+            uint16 invalTcp         : 1; ///< Invalidate vector caches.
+            uint16 invalSqI$        : 1; ///< Invalidate the SQ instruction caches.
+            uint16 invalSqK$        : 1; ///< Invalidate the SQ constant caches (scalar caches).
+            uint16 flushTcc         : 1; ///< Flush L2 cache.
+            uint16 invalTcc         : 1; ///< Invalidate L2 cache.
+            uint16 flushCb          : 1; ///< Flush CB caches.
+            uint16 invalCb          : 1; ///< Invalidate CB caches.
+            uint16 flushDb          : 1; ///< Flush DB caches.
+            uint16 invalDb          : 1; ///< Invalidate DB caches.
+            uint16 invalCbMetadata  : 1; ///< Invalidate CB meta-data cache.
+            uint16 flushCbMetadata  : 1; ///< Flush CB meta-data cache.
+            uint16 invalDbMetadata  : 1; ///< Invalidate DB meta-data cache.
+            uint16 flushDbMetadata  : 1; ///< Flush DB meta-data cache.
+            uint16 invalTccMetadata : 1; ///< Invalidate L2 meta-data cache (also called the GLM).
+            uint16 invalGl1         : 1; ///< Invalidate the global L1 cache
+            uint16 placeholder      : 1; ///< Reserved for future use.
+        };
+
+        uint16 u16All; ///< Unsigned integer containing all the values.
+
+    } caches; ///< Information about cache operations performed for the barrier.
+
+#if PAL_DEVELOPER_BUILD
+    AcquirePoint acquirePoint;
+#endif
+};
+
+/// Enumeration for PAL barrier reasons
+enum BarrierReason : uint32
+{
+    BarrierReasonInvalid = 0,                               ///< Invalid barrier reason
+
+    BarrierReasonFirst   = 0x80000000,                      ///< The first valid barrier reason value
+                                                            ///  The only value that can smaller than this is the
+                                                            ///  invalid value.
+    BarrierReasonLast    = 0xbfffffff,                      ///< The last valid barrier reason value
+                                                            ///  The only value that can larger than this is the
+                                                            ///  unknown value.
+
+    BarrierReasonPreComputeColorClear = BarrierReasonFirst, ///< Barrier issued before a color clear
+    BarrierReasonPostComputeColorClear,                     ///< Barrier issued after a color clear
+    BarrierReasonPreComputeDepthStencilClear,               ///< Barrier issued before a depth/stencil clear
+    BarrierReasonPostComputeDepthStencilClear,              ///< Barrier issued after a depth/stencil clear
+    BarrierReasonMlaaResolveEdgeSync,                       ///< Barrier issued to sync mlaa edge calculations
+    BarrierReasonAqlWaitForParentKernel,                    ///< Barrier issued to wait for the parent kernel to
+                                                            ///  complete in an AQL submission
+    BarrierReasonAqlWaitForChildrenKernels,                 ///< Barrier issued to wait for the children kernels to
+                                                            ///  complete in an AQL submission
+    BarrierReasonP2PBlitSync,                               ///< Barrier issued to synchronize peer-to-peer blits
+    BarrierReasonTimeGraphGrid,                             ///< Barrier issued to wait for the time graph grid
+    BarrierReasonTimeGraphGpuLine,                          ///< Barrier issued to wait for the time graph gpu line
+    BarrierReasonDebugOverlayText,                          ///< Barrier issued to wait for the debug overlay text
+    BarrierReasonDebugOverlayGraph,                         ///< Barrier issued to wait for the debug overlay graph
+    BarrierReasonDevDriverOverlay,                          ///< Barrier issued to wait for developer driver overlay
+    BarrierReasonDmaImgScanlineCopySync,                    ///< Barrier issued to synchronize between image scanline
+                                                            ///  copies on the dma hardware
+    BarrierReasonPostSqttTrace,                             ///< Barrier issued to wait for work from an sqtt trace
+    BarrierReasonPrePerfDataCopy,                           ///< Barrier issued to wait for perf data to become
+                                                            ///  available for copy
+    BarrierReasonFlushL2CachedData,                         ///< Barrier issued to flush L2 cached data to main memory
+    BarrierReasonResolveImage,                              ///< Barrier issued before and after resolve image shader
+    BarrierReasonPerPixelCopy,                              ///< Barrier issued between CS copy and per-pixel copy steps
+    BarrierReasonGenerateMipmaps,                           ///< Barrier issued between generating mip levels
+
+    /// Newly defined barrier reasons should be before this one.
+    BarrierReasonInternalLastDefined,                       ///< Only used for asserts.
+    BarrierReasonUnknown = 0xFFFFFFFF,                      ///< Unknown barrier reason
+
+    /// Backwards compatibility reasons
+    BarrierReasonPreSyncClear  = BarrierReasonPreComputeColorClear,
+    BarrierReasonPostSyncClear = BarrierReasonPostComputeColorClear
+};
+
+/// Style of barrier
+enum class BarrierType : uint32
+{
+    Full = 0, ///< A traditional blocking barrier.
+    Release,  ///< A pipelined barrier that flushes caches and starts transitions.
+    Acquire,  ///< A barrier that waits on previous 'Release' barriers.
+
+    Count
+};
+
+/// Information for barrier executions.
+struct BarrierData
+{
+    ICmdBuffer*       pCmdBuffer;    ///< The command buffer that is executing the barrier.
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 902
+    ImgBarrier        transition;    ///< The particular image barrier with layout transition blt that is currently
+                                     ///  executing, only used during a CallbackType::ImageBarrier.
+#else
+    BarrierTransition transition;    ///< The particular transition with layout transition blt that is currently
+                                     ///  executing, only used during a CallbackType::ImageBarrier.
+#endif
+    bool              hasTransition; ///< Whether or not the transition structure is populated.
+    BarrierOperations operations;    ///< Detailed cache and pipeline operations performed during this barrier execution
+    uint32            reason;        ///< Reason that the barrier was invoked. Only filled at BarrierBegin.
+    BarrierType       type;          ///< What style of barrier this is. Only filled at BarrierBegin.
+};
+
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 888
+/// Enumeration describing the different types of tile mode dimensions
+enum class Gfx6ImageTileModeDimension : uint32
+{
+    Linear = 0, ///< Linear tile mode.
+    Dim1d,      ///< 1D tile mode.
+    Dim2d,      ///< 2D tile mode.
+    Dim3d,      ///< 3D tile mode.
+};
+
+/// Tile mode information
+struct Gfx6ImageTileMode
+{
+    Gfx6ImageTileModeDimension dimension;   ///< Dimensionality of tile mode.
+
+    union
+    {
+        struct
+        {
+            uint32 prt       : 1;   ///< Image is a PRT.
+            uint32 thin      : 1;   ///< Thin tiled.
+            uint32 thick     : 1;   ///< Thick tiled.
+            uint32 reserved  : 29;  ///< Reserved for future use.
+        };
+        uint32 u32All;              ///< Flags packed as 32-bit uint.
+    } properties;                   ///< Bitfield of properties
+};
+
+/// Enumeration describing the different tile types
+enum class Gfx6ImageTileType : uint32
+{
+    Displayable = 0,    ///< Displayable tiling.
+    NonDisplayable,     ///< Non-displayable tiling.
+    DepthSampleOrder,   ///< Same as non-displayable plus depth-sample-order.
+    Rotated,            ///< Rotated displayable tiling.
+    Thick,              ///< Thick micro-tiling.
+};
+#endif
+
+/// Meta-data-related properties
+struct ImageMetaDataInfo
+{
+    union
+    {
+        struct
+        {
+            uint32 color                 : 1;   ///< Flag indicates this is a color buffer.
+            uint32 depth                 : 1;   ///< Flag indicates this is a depth/stencil buffer.
+            uint32 stencil               : 1;   ///< Flag indicates this is a stencil buffer.
+            uint32 texture               : 1;   ///< Flag indicates this is a texture.
+            uint32 cube                  : 1;   ///< Flag indicates this is a cubemap.
+            uint32 volume                : 1;   ///< Flag indicates this is a volume texture.
+            uint32 fmask                 : 1;   ///< Flag indicates this is an fmask.
+            uint32 compressZ             : 1;   ///< Flag indicates z buffer is compressed.
+            uint32 overlay               : 1;   ///< Flag indicates this is an overlay surface.
+            uint32 noStencil             : 1;   ///< Flag indicates this depth has no separate stencil.
+            uint32 display               : 1;   ///< Flag indicates this should match display controller req.
+            uint32 opt4Space             : 1;   ///< Flag indicates this surface should be optimized for space
+                                                ///  i.e. save some memory but may lose performance.
+            uint32 prt                   : 1;   ///< Flag for partially resident texture.
+            uint32 tcCompatible          : 1;   ///< Image's metadata is TC-compatible.  This reduces the maximum
+                                                ///  compression levels, but allows the shader to read the data without
+                                                ///  an expensive decompress operation.
+            uint32 dccCompatible         : 1;   ///< GFX 8: whether to make MSAA surface support dcc fast clear.
+            uint32 dccPipeWorkaround     : 1;   ///< GFX 8: whether to workaround the HW limit that
+                                                ///  dcc can't be enabled if pipe config of tile mode
+                                                ///  is different from that of ASIC.
+            uint32 disableLinearOpt      : 1;   ///< Disable tile mode optimization to linear.
+            uint32 reserved              : 15;  ///< Reserved for future use.
+        };
+        uint32 u32All;              ///< Flags packed as 32-bit uint.
+    } properties;                   ///< Bitfield of properties
+};
+
+/// Information for allocation of a PAL Image - AddrLib surface info.
+struct ImageDataAddrMgrSurfInfo
+{
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 888
+    union
+    {
+        struct
+        {
+            Gfx6ImageTileMode mode; ///< Tile mode.
+            Gfx6ImageTileType type; ///< Micro tiling type.
+        } gfx6;
+        struct
+        {
+            uint32 swizzle;         ///< Swizzle mode.
+        } gfx9;
+    } tiling;
+#endif
+
+    ImageMetaDataInfo flags;    ///< Metadata info.
+    uint32            swizzle;  ///< HW-specific swizzle mode.
+    uint64            size;     ///< Surface size, in bytes.
+    uint32            bpp;      ///< Bits per pixel.
+    uint32            width;    ///< Width.
+    uint32            height;   ///< Height.
+    uint32            depth;    ///< Depth.
+};
+
+/// Type of surface for which the register data is being provided
+enum class SurfRegDataType : uint32
+{
+    RenderTargetView,   ///< Render Target View information.
+};
+
+/// Information for surface addresses for a SurfRegData callback
+struct SurfRegDataInfo
+{
+    SurfRegDataType type;    ///< Type of surface to which the register data corresponds.
+    uint32          regData; ///< Hardware-specific register data for the specific surface type.
+};
+
+/// Type of draw or dispatch operation for a DrawDispatch callback
+enum class DrawDispatchType : uint32
+{
+    CmdDraw = 0,                       ///< Auto-indexed draw.
+    CmdDrawOpaque,                     ///< Auto draw.
+    CmdDrawIndexed,                    ///< Indexed draw.
+    CmdDrawIndirectMulti,              ///< (Multi) indirect draw.
+    CmdDrawIndexedIndirectMulti,       ///< (Multi) indirect indexed draw.
+    CmdDispatchMesh,                   ///< Task/Mesh shader dispatch.
+    CmdDispatchMeshIndirectMulti,      ///< Indirect Task/Mesh shader dispatch.
+    CmdGenExecuteIndirectDraw,         ///< ExecuteIndirect draw.
+    CmdGenExecuteIndirectDrawIndexed,  ///< ExecuteIndirect indexed draw.
+    CmdGenExecuteIndirectDispatchMesh, ///< ExecuteIndirect Task/Mesh shader dispatch.
+    CmdDispatch,                       ///< Direct compute dispatch.
+    CmdDispatchAce,                    ///< Direct Compute dispatch through implicit ganged-submit ACE stream.
+    CmdDispatchIndirect,               ///< Indirect compute dispatch.
+    CmdDispatchOffset,                 ///< Direct compute dispatch (offsetted start).
+    CmdGenExecuteIndirectDispatch,     ///< ExecuteIndirect dispatch.
+    CmdDispatchAql,                    ///< AQL compute dispatch
+
+    Count,
+    FirstDispatch = CmdDispatch        ///< All callbacks with an enum value greater or equal than this are dispatches
+};
+
+/// Draw-specific information for DrawDispatch callbacks
+struct DrawDispatchDrawArgs
+{
+    /// Contains information about user data register indices for certain draw parameter state.
+    /// Some of these values may not be available for all draws on all clients, and in such
+    /// cases the value will be UINT_MAX.
+    struct
+    {
+        uint32 firstVertex;    ///< Vertex offset (first vertex) user data register index
+        uint32 instanceOffset; ///< Instance offset (start instance) user data register index
+        uint32 drawIndex;      ///< Draw ID SPI user data register index
+    } userDataRegs;
+};
+
+/// Dispatch-specific information for DrawDispatch callbacks
+struct DrawDispatchDispatchArgs
+{
+    DispatchDims groupStart;  ///< Thread/workgroup start offsets in X/Y/Z dimensions. Only valid for CmdDispatchOffset.
+    DispatchDims groupDims;   ///< Thread/workgroup counts in X/Y/Z dimensions. Only valid for CmdDispatch[Offset].
+    DispatchDims logicalSize; ///< Thread/workgroup counts as seen by the shader. Only valid for CmdDispatchOffset.
+    /// Optional flags to help the client driver understand the dispatch.
+    /// For example, if the dispatch originated in PAL rather than the client driver.
+    DispatchInfoFlags infoFlags;
+};
+
+/// Information for DrawDispatch callbacks
+struct DrawDispatchData
+{
+    ICmdBuffer*      pCmdBuffer; ///< The command buffer that is recording this command
+    DrawDispatchType cmdType;    ///< Draw/dispatch command type.  This influences which sub-structure below is valid.
+
+    union
+    {
+        /// Draw-specific parameters.  Valid when cmdType is CmdDraw*.
+        DrawDispatchDrawArgs draw;
+
+        /// Dispatch-specific parameters.  Valid when cmdType is CmdDispatch*
+        DrawDispatchDispatchArgs dispatch;
+    };
+
+    /// If the handler of this callback inserts an RGP trace marker using ICmdBuffer::CmdInsertRgpTraceMarker(),
+    /// these flags should be passed to that call to control which sub-queue(s) in the command buffer should insert
+    /// the marker.
+    RgpMarkerSubQueueFlags subQueueFlags;
+};
+
+/// Information for BindPipeline callbacks
+struct BindPipelineData
+{
+    const IPipeline*    pPipeline;      ///< The currently-bound pipeline
+    const PipelineInfo* pPipelineInfo;  ///< General information about the bound pipeline
+    ICmdBuffer*         pCmdBuffer;     ///< The command buffer that is recording this command
+    uint64              apiPsoHash;     ///< The hash to correlate APIs and corresponding PSOs.
+    PipelineBindPoint   bindPoint;      ///< The bind point of the pipeline within a queue.
+
+    /// If the handler of this callback inserts an RGP trace marker using ICmdBuffer::CmdInsertRgpTraceMarker(),
+    /// these flags should be passed to that call to control which sub-queue(s) in the command buffer should insert
+    /// the marker.
+    RgpMarkerSubQueueFlags subQueueFlags;
+};
+
+#if PAL_DEVELOPER_BUILD
+/// Information for DrawDispatchValidation callbacks
+struct DrawDispatchValidationData
+{
+    ICmdBuffer* pCmdBuffer;         ///< The command buffer which is recording the triggering draw or dispatch.
+    uint32      userDataCmdSize;    ///< Size of PM4 commands used to validate the current user-data entries (bytes).
+    uint32      miscCmdSize;        ///< Size of PM4 commands for all other draw- or dispatch-time validation (bytes).
+};
+
+// Information for BindPipelineValidation callbacks
+struct BindPipelineValidationData
+{
+    ICmdBuffer* pCmdBuffer;         ///< The command buffer which is recording the triggering draw or dispatch.
+    uint32      pipelineCmdSize;    ///< Size of PM4 commands used to validate the current pipeline state (bytes).
+};
+
+/// Information for OptimizedRegisters callbacks
+struct OptimizedRegistersData
+{
+    ICmdBuffer*   pCmdBuffer;       ///< The command buffer which is recording the triggering PM4 stream.
+    /// Array containing the number of times the PM4 optimizer saw a SET packet which modified each register
+    const uint32* pShRegSeenSets;
+    ///< Array containing the number of times the PM4 optimizer kept a SET packet which modified each register
+    const uint32* pShRegKeptSets;
+    uint32        shRegCount;       ///< Number of SH registers
+    uint16        shRegBase;        ///< Base address of SH registers
+    /// Array containing the number of times the PM4 optimizer saw a SET or RMW packet which modified each register
+    const uint32* pCtxRegSeenSets;
+    ///< Array containing the number of times the PM4 optimizer kept a SET or RMW packet which modified each register
+    const uint32* pCtxRegKeptSets;
+    uint32        ctxRegCount;      ///< Number of context registers
+    uint16        ctxRegBase;       ///< Base address of context registers
+};
+
+/// Internal RPM blt type
+enum class RpmBltType : uint32
+{
+    CpDmaCopy = 0,
+    CpDmaUpdate,
+    Draw,
+    Dispatch,
+
+    Count
+};
+
+/// Describes the RPM blt call
+struct RpmBltData
+{
+    ICmdBuffer* pCmdBuffer;    ///< The command buffer that is executing the blt.
+    RpmBltType  bltType;       ///< Type of RPM blt, @ref RpmBltType.
+};
+#endif
+
+/// Describes the binding of a GPU Memory object to a resource
+struct BindGpuMemoryData
+{
+    const void*         pObj;               ///< Opaque pointer to the resource having memory bound to it.
+    gpusize             requiredGpuMemSize; ///< GPU memory size required by pObj.
+    const IGpuMemory*   pGpuMemory;         ///< IGpuMemory object being bound to the resource.
+    gpusize             offset;             ///< Offset within pGpuMemory where the resource is being bound.
+    bool                isSystemMemory;     ///< If true then system memory is being bound to the object. In this case,
+                                            ///  pGpuMemory and offset should be set to zero.
+};
+
+/// Describes an user marker operation
+enum class UserMarkerOpType : uint8
+{
+    Invalid = 0,        ///< Invalid user marker operation
+    Push,               ///< Push user marker operation
+    Pop,                ///< Pop user marker operation
+    Set                 ///< Set user marker operation
+};
+
+/// Describes an user marker operation, used in UserMarkerHistoryTraceSource
+struct UserMarkerOpInfo
+{
+    union
+    {
+        struct
+        {
+            uint32 opType   : 2;    ///< UserMarkerOpType
+            uint32 strIndex : 30;   ///< Index of the user marker in the in corresponding string table
+        };
+        uint32 u32All;
+    };
+};
+
+} // Developer
+} // Pal
@@ -0,0 +1,171 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palFence.h
+ * @brief Defines the Platform Abstraction Library (PAL) IFence interface and related types.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palDestroyable.h"
+
+namespace Pal
+{
+
+/// Specifies properties for fence @ref IFence fence creation. Input structure to IDevice::CreateFence().
+struct FenceCreateInfo
+{
+    union
+    {
+        struct
+        {
+            uint32 signaled            : 1;  ///< Specify whether the initial status of the fence is signaled or not.
+            uint32 eventCanBeInherited : 1;  ///< The event handle can be inherited by child process.
+            uint32 shareable           : 1;  ///< This fence may be opened for use by a different device.
+            uint32 reserved            : 29; ///< Reserved for future use.
+        };
+        uint32 u32All;      ///< Flags packed as 32-bit uint.
+    } flags;                ///< Fence creation flags.
+#if defined(_WIN32)
+    const wchar_t* pName;   /// The name of the event object, Windows uses this name to uniquely identify fence objects
+                            /// across processes.
+#endif
+};
+
+/// Specifies properties for fence opening. Input structure to IDevice::OpenFence().
+struct FenceOpenInfo
+{
+    union
+    {
+        struct
+        {
+            uint32 isReference  : 1;    ///< If set, then the opened fence will reference the same sync object
+                                        ///< in the kernel.  Otherwise, the object is copied to the new Fence.
+            uint32 reserved     : 31;   ///< Reserved for future use.
+        };
+        uint32 u32All;                  ///< Flags packed as 32-bit uint.
+    } flags;
+
+    OsExternalHandle externalFence;     ///< External shared fence handle.
+#if defined(_WIN32)
+    const wchar_t*   pName;             /// The name of the event object,Windows uses this name to uniquely identify
+                                        ///  fence objects across processes.
+#endif
+};
+
+/// Specifies properties for fence exporting. Input structure to IFence::ExportExternalHandle().
+struct FenceExportInfo
+{
+    union
+    {
+        struct
+        {
+            uint32 isReference     : 1;  ///< If set, then the fence exporting a handle that reference the same sync
+                                         ///< object in the kernel.  Otherwise, the object is copied to the new Fence.
+            uint32 implicitReset   : 1;  ///< If set, a fence reset will be done for the sync fd exported.
+            uint32 reserved        : 30; ///< Reserved for future use.
+        };
+        uint32 u32All;                  ///< Flags packed as 32-bit uint.
+    } flags;
+};
+
+/**
+ ***********************************************************************************************************************
+ * @interface IFence
+ * @brief     Represents a command buffer fence the client can use for coarse-level synchronization between the GPU and
+ *            CPU.
+ *
+ * Fences can be specified when calling IQueue::Submit() and will be signaled when certain prior queue operations have
+ * completed.  The status of the fence can be queried by the client to determine when the GPU work of interest has
+ * completed.
+ *
+ * Fences are guaranteed to wait for:
+ * + Prior command buffer submissions.
+ * + Prior queue semaphore signals and waits.
+ * + Prior direct presents.
+ *
+ * @see IDevice::CreateFence()
+ ***********************************************************************************************************************
+ */
+class IFence : public IDestroyable
+{
+public:
+    /// Gets the status (completed or not) of the fence.
+    ///
+    /// @returns Success if the fence has been reached, or NotReady if the fence hasn't been reached.  Other return
+    ///          codes indicate an error:
+    ///          + ErrorFenceNeverSubmitted if the fence hasn't been submitted yet and the fence is not created with
+    ///            initialSignaled set to true.
+    virtual Result GetStatus() const = 0;
+
+    /// Export the event handle or sync object handle of the fence for external usage.
+    /// If @ref FenceExportInfo::isReference is not set, then this also performs an implicit reset operation on
+    /// the Fence.
+    ///
+    /// @param  [in] exportInfo    Information describing how the Fence handle should be exported.
+    /// @returns the handle in the type OsExternalHandle
+    virtual OsExternalHandle ExportExternalHandle(
+        const FenceExportInfo& exportInfo) const = 0;
+
+    /// Returns the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @returns Pointer to client data.
+    void* GetClientData() const
+    {
+        return m_pClientData;
+    }
+
+    /// Sets the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @param  [in]    pClientData     A pointer to arbitrary client data.
+    void SetClientData(
+        void* pClientData)
+    {
+        m_pClientData = pClientData;
+    }
+
+protected:
+    /// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
+    /// called the proper create method.
+    IFence() : m_pClientData(nullptr) {}
+
+    /// @internal Destructor.  Prevent use of delete operator on this interface.  Client must destroy objects by
+    /// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
+    /// object on their own.
+    virtual ~IFence() { }
+
+private:
+    /// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
+    /// and set via SetClientData().
+    /// For non-top-layer objects, this will point to the layer above the current object.
+    void* m_pClientData;
+};
+
+} // Pal
@@ -0,0 +1,506 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palFormat.h
+ * @brief Common include for the Platform Abstraction Library (PAL) interface.  Defines format types.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "palUtil.h"
+
+/// Library-wide namespace encapsulating all PAL entities.
+namespace Pal
+{
+
+/// Specifies the format for an image or GPU memory view.
+///
+/// This defines the bit layout of the channels and how the value in each channel is interpreted.
+///
+/// Channels are listed in DX10+-style order, where the least significant channels are listed first.  For example, for
+/// a uint32 val with an X8Y8Z8W8 value: X = val & 0xFF, Y = (val >> 8) & 0xFF, Z = (val >> 16) & 0xFF,
+/// W = (val >> 24) & 0xFF.  Enums without a more detailed explanation can be decoded in this manner.  Multimedia,
+/// or "YUV" formats are all exceptions to this rule.  Each of those formats explicitly describes how their channels are
+/// organized.
+///
+/// Many of the multimedia (i.e., "YUV") formats are encoded such that the chrominance (chroma, CbCr, UV) samples are
+/// stored at a lower resolution than the luminance (luma, Y) samples as a form of compression.  The ratio of the
+/// subsampling is often referred to using an A:B:C notation, where the trio of numbers A,B,C are used to describe the
+/// number of luma and chroma samples in a hypothetical region which is A pixels wide and 2 pixels high.  The three
+/// numbers usually refer to the following quantities by convention:
+///  A --> Width of the conceptual region of pixels, and is usually 4.
+///  B --> Number of chroma samples in the first of two rows of A pixels.
+///  C --> Number of changes of chroma samples between the first and second rows of A pixels.
+///
+/// Common examples of this notation are described below:
+///  4:4:4 --> No chroma subsampling because luma and chroma both have 4 samples per row, and the number of chroma and
+///            luma rows is the same.
+///  4:1:1 --> Each row of 4 pixels has 1 chroma sample, and the number of chroma and luma rows is the same.
+///  4:2:0 --> Each row of 4 pixels has 2 chroma samples, and there is only 1 chroma row for every 2 luma rows.
+///  4:2:2 --> Each row of 4 pixels has 2 chroma samples, and the number of chroma and luma rows is the same.
+///
+/// Because of the subsampling ratios for multimedia formats, there are some restrictions on what dimensions can be used
+/// when creating Images of these formats. 4:1:1 formats must have widths specified as a multiple of 4. 4:2:0 formats
+/// must have widths and heights specified as multiples of 2. 4:2:2 formats must have widths specified as a multiple of
+/// 2. 4:4:4 formats have no dimensional restrictions.
+///
+/// Additionally, the YUV formats are broadly grouped into two categories: packed and planar formats.  Packed formats
+/// interleave the luma and chroma samples in each row of pixels.  Planar formats are organized so that all of the luma
+/// samples are together, followed by all of the chroma samples.  Some planar formats interleave the U and V chroma
+/// data, while some choose to have separate U and V planes.  Both packed and planar formats can have any subsampling
+/// ratio between the luma and chroma data.
+enum class ChNumFormat : Util::uint32
+{
+    Undefined                = 0x0,     ///< Used in situations where no format is needed, like raw memory views, or to
+                                        ///  indicate no color/depth target will be attached when creating a graphics
+                                        ///  pipeline.
+    X1_Unorm                 = 0x1,     ///< _Untested._
+    X1_Uscaled               = 0x2,     ///< _Untested._
+    X4Y4_Unorm               = 0x3,
+    X4Y4_Uscaled             = 0x4,
+    L4A4_Unorm               = 0x5,
+    X4Y4Z4W4_Unorm           = 0x6,
+    X4Y4Z4W4_Uscaled         = 0x7,
+    X5Y6Z5_Unorm             = 0x8,
+    X5Y6Z5_Uscaled           = 0x9,
+    X5Y5Z5W1_Unorm           = 0xA,
+    X5Y5Z5W1_Uscaled         = 0xB,
+    X1Y5Z5W5_Unorm           = 0xC,
+    X1Y5Z5W5_Uscaled         = 0xD,
+    X8_Unorm                 = 0xE,
+    X8_Snorm                 = 0xF,
+    X8_Uscaled               = 0x10,
+    X8_Sscaled               = 0x11,
+    X8_Uint                  = 0x12,
+    X8_Sint                  = 0x13,
+    X8_Srgb                  = 0x14,
+    A8_Unorm                 = 0x15,
+    L8_Unorm                 = 0x16,
+    P8_Unorm                 = 0x17,
+    X8Y8_Unorm               = 0x18,
+    X8Y8_Snorm               = 0x19,
+    X8Y8_Uscaled             = 0x1A,
+    X8Y8_Sscaled             = 0x1B,
+    X8Y8_Uint                = 0x1C,
+    X8Y8_Sint                = 0x1D,
+    X8Y8_Srgb                = 0x1E,
+    L8A8_Unorm               = 0x1F,
+    X8Y8Z8W8_Unorm           = 0x20,
+    X8Y8Z8W8_Snorm           = 0x21,
+    X8Y8Z8W8_Uscaled         = 0x22,
+    X8Y8Z8W8_Sscaled         = 0x23,
+    X8Y8Z8W8_Uint            = 0x24,
+    X8Y8Z8W8_Sint            = 0x25,
+    X8Y8Z8W8_Srgb            = 0x26,
+    U8V8_Snorm_L8W8_Unorm    = 0x27,    ///< Mixed signed/unsigned format. Valid Image and Color-Target View formats
+                                        ///  are X8Y8Z8W8_Snorm (to target U8V8_Snorm) and X8Y8Z8W8_Unorm (to target
+                                        ///  L8W8_Unorm).
+    X10Y11Z11_Float          = 0x28,
+    X11Y11Z10_Float          = 0x29,
+    X10Y10Z10W2_Unorm        = 0x2A,
+    X10Y10Z10W2_Snorm        = 0x2B,
+    X10Y10Z10W2_Uscaled      = 0x2C,
+    X10Y10Z10W2_Sscaled      = 0x2D,
+    X10Y10Z10W2_Uint         = 0x2E,
+    X10Y10Z10W2_Sint         = 0x2F,
+    X10Y10Z10W2Bias_Unorm    = 0x30,    ///< A four-component, 32-bit 2.8-biased fixed-point format that supports 10
+                                        ///  bits for each color channel and 2-bit alpha. A shader must be aware of
+                                        ///  *Bias* and must perform its own bias and scale on any data that is read
+                                        ///  from or written.
+    U10V10W10_Snorm_A2_Unorm = 0X31,    ///< Mixed signed/unsigned format. Valid Image and Color-Target View formats
+                                        ///  are X10Y10Z10W2_Snorm (to target U10V10W10_Snorm) and X10Y10Z10W2_Unorm
+                                        ///  (to target A2_Unorm).
+    X16_Unorm                = 0x32,
+    X16_Snorm                = 0x33,
+    X16_Uscaled              = 0x34,
+    X16_Sscaled              = 0x35,
+    X16_Uint                 = 0x36,
+    X16_Sint                 = 0x37,
+    X16_Float                = 0x38,
+    L16_Unorm                = 0x39,
+    X16Y16_Unorm             = 0x3A,
+    X16Y16_Snorm             = 0x3B,
+    X16Y16_Uscaled           = 0x3C,
+    X16Y16_Sscaled           = 0x3D,
+    X16Y16_Uint              = 0x3E,
+    X16Y16_Sint              = 0x3F,
+    X16Y16_Float             = 0x40,
+    X16Y16Z16W16_Unorm       = 0x41,
+    X16Y16Z16W16_Snorm       = 0x42,
+    X16Y16Z16W16_Uscaled     = 0x43,
+    X16Y16Z16W16_Sscaled     = 0x44,
+    X16Y16Z16W16_Uint        = 0x45,
+    X16Y16Z16W16_Sint        = 0x46,
+    X16Y16Z16W16_Float       = 0x47,
+    X32_Uint                 = 0x48,
+    X32_Sint                 = 0x49,
+    X32_Float                = 0x4A,
+    X32Y32_Uint              = 0x4B,
+    X32Y32_Sint              = 0x4C,
+    X32Y32_Float             = 0x4D,
+    X32Y32Z32_Uint           = 0x4E,
+    X32Y32Z32_Sint           = 0x4F,
+    X32Y32Z32_Float          = 0x50,
+    X32Y32Z32W32_Uint        = 0x51,
+    X32Y32Z32W32_Sint        = 0x52,
+    X32Y32Z32W32_Float       = 0x53,
+    D16_Unorm_S8_Uint        = 0x54,
+    D32_Float_S8_Uint        = 0x55,
+    X9Y9Z9E5_Float           = 0x56,    ///< Three partial-precision floating-point numbers encoded into a single 32-bit
+                                        ///  value all sharing the same 5-bit exponent (variant of s10e5, which is sign
+                                        ///  bit, 10-bit mantissa, and 5-bit biased (15) exponent). There is no sign
+                                        ///  bit, and there is a shared 5-bit biased (15) exponent and a 9-bit mantissa
+                                        ///  for each channelShared exponent format.
+    Bc1_Unorm                = 0x57,    ///< BC1 compressed texture format.
+    Bc1_Srgb                 = 0x58,    ///< BC1 compressed texture format.
+    Bc2_Unorm                = 0x59,    ///< BC2 compressed texture format.
+    Bc2_Srgb                 = 0x5A,    ///< BC2 compressed texture format.
+    Bc3_Unorm                = 0x5B,    ///< BC3 compressed texture format.
+    Bc3_Srgb                 = 0x5C,    ///< BC3 compressed texture format.
+    Bc4_Unorm                = 0x5D,    ///< BC4 compressed texture format.
+    Bc4_Snorm                = 0x5E,    ///< BC4 compressed texture format.
+    Bc5_Unorm                = 0x5F,    ///< BC5 compressed texture format.
+    Bc5_Snorm                = 0x60,    ///< BC5 compressed texture format.
+    Bc6_Ufloat               = 0x61,    ///< BC6 unsigned compressed texture format.
+    Bc6_Sfloat               = 0x62,    ///< BC6 signed compressed texture format.
+    Bc7_Unorm                = 0x63,    ///< BC7 compressed texture format.
+    Bc7_Srgb                 = 0x64,    ///< BC7 compressed texture format.
+    Etc2X8Y8Z8_Unorm         = 0x65,
+    Etc2X8Y8Z8_Srgb          = 0x66,
+    Etc2X8Y8Z8W1_Unorm       = 0x67,
+    Etc2X8Y8Z8W1_Srgb        = 0x68,
+    Etc2X8Y8Z8W8_Unorm       = 0x69,
+    Etc2X8Y8Z8W8_Srgb        = 0x6A,
+    Etc2X11_Unorm            = 0x6B,
+    Etc2X11_Snorm            = 0x6C,
+    Etc2X11Y11_Unorm         = 0x6D,
+    Etc2X11Y11_Snorm         = 0x6E,
+    AstcLdr4x4_Unorm         = 0x6F,
+    AstcLdr4x4_Srgb          = 0x70,
+    AstcLdr5x4_Unorm         = 0x71,
+    AstcLdr5x4_Srgb          = 0x72,
+    AstcLdr5x5_Unorm         = 0x73,
+    AstcLdr5x5_Srgb          = 0x74,
+    AstcLdr6x5_Unorm         = 0x75,
+    AstcLdr6x5_Srgb          = 0x76,
+    AstcLdr6x6_Unorm         = 0x77,
+    AstcLdr6x6_Srgb          = 0x78,
+    AstcLdr8x5_Unorm         = 0x79,
+    AstcLdr8x5_Srgb          = 0x7A,
+    AstcLdr8x6_Unorm         = 0x7B,
+    AstcLdr8x6_Srgb          = 0x7C,
+    AstcLdr8x8_Unorm         = 0x7D,
+    AstcLdr8x8_Srgb          = 0x7E,
+    AstcLdr10x5_Unorm        = 0x7F,
+    AstcLdr10x5_Srgb         = 0x80,
+    AstcLdr10x6_Unorm        = 0x81,
+    AstcLdr10x6_Srgb         = 0x82,
+    AstcLdr10x8_Unorm        = 0x83,
+    AstcLdr10x8_Srgb         = 0x84,
+    AstcLdr10x10_Unorm       = 0x85,
+    AstcLdr10x10_Srgb        = 0x86,
+    AstcLdr12x10_Unorm       = 0x87,
+    AstcLdr12x10_Srgb        = 0x88,
+    AstcLdr12x12_Unorm       = 0x89,
+    AstcLdr12x12_Srgb        = 0x8A,
+    AstcHdr4x4_Float         = 0x8B,
+    AstcHdr5x4_Float         = 0x8C,
+    AstcHdr5x5_Float         = 0x8D,
+    AstcHdr6x5_Float         = 0x8E,
+    AstcHdr6x6_Float         = 0x8F,
+    AstcHdr8x5_Float         = 0x90,
+    AstcHdr8x6_Float         = 0x91,
+    AstcHdr8x8_Float         = 0x92,
+    AstcHdr10x5_Float        = 0x93,
+    AstcHdr10x6_Float        = 0x94,
+    AstcHdr10x8_Float        = 0x95,
+    AstcHdr10x10_Float       = 0x96,
+    AstcHdr12x10_Float       = 0x97,
+    AstcHdr12x12_Float       = 0x98,
+    X8Y8_Z8Y8_Unorm          = 0x99,    ///< _Untested._
+    X8Y8_Z8Y8_Uscaled        = 0x9A,    ///< _Untested._
+    Y8X8_Y8Z8_Unorm          = 0x9B,    ///< _Untested._
+    Y8X8_Y8Z8_Uscaled        = 0x9C,    ///< _Untested._
+    AYUV                     = 0x9D,    ///< YUV 4:4:4 packed format.  Valid Image and Color-Target view formats are
+                                        ///  { X8Y8Z8W8, Unorm } and { X8Y8Z8W8, Uint }.  Each view fully maps the
+                                        ///  entire YUV subresource, with the V,U,Y,A channels mapped to the X,Y,Z,W
+                                        ///  channels respectively.  Additionally, Image views can use the { X32, Uint }
+                                        ///  format where all four channels are packed into a single uint32.
+    UYVY                     = 0x9E,    ///< YUV 4:2:2 packed format.  The Image data is subsampled such that each 32bit
+                                        ///  element contains two Y samples and one U and V sample.  Valid Image view
+                                        ///  formats are { X8Y8Z8W8, Unorm } and { X8Y8Z8W8, Uint }.  Each view fully
+                                        ///  maps the entire YUV subresource, with the X,Y,Z,W channels mapped to the
+                                        ///  U0,Y0,V0,Y1 channels respectively. Additionally, Image views can use the
+                                        ///  { X32, Uint } format where all four channels are packed into a single
+                                        ///  uint32. Image views can also use the { X8Y8_Z8Y8, Unorm } format to access
+                                        ///  these as well. In this case, the width of the Image view would appear to be
+                                        ///  twice as wide as it normally does, and the X0,Y0,Z0,Y1 channels map to the
+                                        ///  U0,Y0,V0,Y1 channels respectively.
+    VYUY                     = 0x9F,    ///< YUV 4:2:2 packed format.  The image data is encoded just like the
+                                        ///  @ref ChNumFormat::UYVY format, except with a different channel ordering.
+                                        ///  Image views with X8Y8Z8W8 channel formats map the X,Y,Z,W channels to the
+                                        ///  V0,Y0,U0,Y1 channels respectively. Image views with the X8Y8_Z8Y8 channel
+                                        ///  format map the X0,Y0,Z0,Y1 channels to the V0,Y0,U0,Y1 channels
+                                        ///  respectively.
+    YUY2                     = 0xA0,    ///< YUV 4:2:2 packed format.  The image data is encoded just like the
+                                        ///  @ref ChNumFormat::UYVY format, except with a different channel ordering.
+                                        ///  X8Y8Z8W8 Image view formats map the X,Y,Z,W channels to the Y0,U0,Y1,V0
+                                        ///  channels respectively. Image views can use the { Y8X8_Y8Z8, Unorm } format
+                                        ///  where the Y0,X0,Y1,Z0 channels are mapped to the Y0,U0,Y1,V0 channels.
+    YVY2                     = 0xA1,    ///< YUV 4:2:2 packed format.  The image data is encoded just like the
+                                        ///  @ref ChNumFormat::YUY2 format, except with a different channel ordering.
+                                        ///  X8Y8Z8W8 Image view formats map the X,Y,Z,W channels to the Y0,V0,Y1,U0
+                                        ///  channels respectively. Image views can use the { Y8X8_Y8Z8, Unorm } format
+                                        ///  where the Y0,X0,Y1,Z0 channels are mapped to the Y0,V0,Y1,U0 channels.
+    YV12                     = 0xA2,    ///< YVU 4:2:0 planar format, with 8 bits per luma and chroma sample.  The Y
+                                        ///  plane is first, containg a uint8 per sample.  Next is the V plane and the U
+                                        ///  plane, both of which have a uint8 per sample.  Valid Image view formats are
+                                        ///  { X8, Unorm } and { X8, Uint }.  Each view only has access to one of the Y,
+                                        ///  V, or U planes.
+    NV11                     = 0xA3,    ///< YUV 4:1:1 planar format, with 8 bits per luma and chroma sample.  The Y
+                                        ///  plane is first, containing a uint8 per sample.  Next is a UV plane which
+                                        ///  has interleaved U and V samples, each stored as a uint8.  Valid Image and
+                                        ///  Color-Target view formats are { X8, Unorm }, { X8, Uint }, { X8Y8, Unorm }
+                                        ///  and { X8Y8, Uint }.  When using an X8 channel format for the View, the view
+                                        ///  only has access to the Y plane.  When using X8Y8, the view only has access
+                                        ///  to the UV plane.
+    NV12                     = 0xA4,    ///< YUV 4:2:0 planar format, with 8 bits per luma and chroma sample.  The Y
+                                        ///  plane is first, containing a uint8 per sample.  Next is a UV plane which
+                                        ///  has interleaved U and V samples, each stored as a uint8.  Valid Image and
+                                        ///  Color-Target view formats are { X8, Unorm }, { X8, Uint }, { X8Y8, Unorm }
+                                        ///  and { X8Y8, Uint }.  When using an X8 channel format for the View, the view
+                                        ///  only has access to the Y plane.  When using X8Y8, the view only has access
+                                        ///  to the UV plane.
+    NV21                     = 0xA5,    ///< YUV 4:2:0 planar format, with 8 bits per luma and chroma sample.  This is
+                                        ///  identical to @ref ChNumFormat::NV12, except that the second plane swaps the
+                                        ///  ordering of the U and V samples. Image views behave just like with
+                                        ///  @ref ChNumFormat::NV12.
+    P016                     = 0xA6,    ///< YUV 4:2:0 planar format, with 16 bits per luma and chroma sample.  The
+                                        ///  plane ordering is identical to @ref ChNumFormat::NV12.  Instead of uint8
+                                        ///  samples, this format uses 8.8 fixed point sample encoding.  Image views
+                                        ///  behave just like with @ref ChNumFormat::NV12, except R16 channel formats
+                                        ///  are used for the Y plane, and X16Y16 channel formats are used for the UV
+                                        ///  plane.
+    P010                     = 0xA7,    ///< YUV 4:2:0 planar format, with 10 bits per luma and chroma sample.  This is
+                                        ///  identical to @ref ChNumFormat::P016, except that the lowest 6 bits of each
+                                        ///  luma and chroma sample are ignored. This allows the source data to be
+                                        ///  interpreted as either P016 or P010 interchangably.
+    P210                     = 0xA8,    ///< YUV 4:2:2 planar format, with 10 bits per luma and chroma sample. This is
+                                        ///  similar to @ref ChNumFormat::P010, except that the UV planes are sub-sampled
+                                        ///  only in the horizontal direction, but still by a factor of 2 so the UV plane
+                                        ///  ends up having the same number of lines as the Y plane.
+    X8_MM_Unorm              = 0xA9,    ///< Multi-media format used with DCC for non-interleaved planes in YUV planar
+                                        ///  surfaces. Such as the Y plane or any plane in YV12.
+    X8_MM_Uint               = 0xAA,    ///< Multi-media format used with DCC for non-interleaved planes in YUV planar
+                                        ///  surfaces. Such as the Y plane or any plane in YV12.
+    X8Y8_MM_Unorm            = 0xAB,    ///< Multi-media format used with DCC for the interleaved UV plane in YUV planar
+                                        ///  surfaces.
+    X8Y8_MM_Uint             = 0xAC,    ///< Multi-media format used with DCC for the interleaved UV plane in YUV planar
+                                        ///  surfaces.
+    X16_MM10_Unorm           = 0xAD,    ///< Multi-media format used with DCC for non-interleaved planes in YUV planar
+                                        ///  surfaces (10-bit). Such as the Y plane or any plane in YV12.
+    X16_MM10_Uint            = 0xAE,    ///< Multi-media format used with DCC for non-interleaved planes in YUV planar
+                                        ///  surfaces (10-bit). Such as the Y plane or any plane in YV12.
+    X16Y16_MM10_Unorm        = 0xAF,    ///< Multi-media format used with DCC for the interleaved UV plane in YUV planar
+                                        ///  surfaces (10-bit).
+    X16Y16_MM10_Uint         = 0xB0,    ///< Multi-media format used with DCC for the interleaved UV plane in YUV planar
+                                        ///  surfaces (10-bit).
+    P208                     = 0xB1,    ///< YUV 4:2:2 planar format, with 8 bits per luma and chroma sample. This is
+                                        ///  similar to @ref ChNumFormat::NV12, except that the UV planes are sub-sampled
+                                        ///  only in the horizontal direction, but still by a factor of 2 so the UV plane
+                                        ///  ends up having the same number of lines as the Y plane. This format is
+                                        ///  sometimes referred to as NV16.
+    X16_MM12_Unorm           = 0xB2,    ///< Multi-media format used with DCC for non-interleaved planes in YUV planar
+                                        ///  surfaces (12-bit).
+    X16_MM12_Uint            = 0xB3,    ///< Multi-media format used with DCC for non-interleaved planes in YUV planar
+                                        ///  surfaces (12-bit).
+    X16Y16_MM12_Unorm        = 0xB4,    ///< Multi-media format used with DCC for the interleaved UV plane in YUV planar
+                                        ///  surfaces (12-bit).
+    X16Y16_MM12_Uint         = 0xB5,    ///< Multi-media format used with DCC for the interleaved UV plane in YUV planar
+                                        ///  surfaces (12-bit).
+    P012                     = 0xB6,    ///< YUV 4:2:0 planar format, with 12 bits per luma and chroma sample.  This is
+                                        ///  identical to @ref ChNumFormat::P010, except that the lowest 4 bits of each
+                                        ///  luma and chroma sample are ignored.
+    P212                     = 0xB7,    ///< YUV 4:2:2 planar format, with 12 bits per luma and chroma sample.  This is
+                                        ///  identical to @ref ChNumFormat::P210, except that the lowest 4 bits of each
+                                        ///  luma and chroma sample are ignored.
+    P412                     = 0xB8,    ///< YUV 4:4:4 planar format, with 12 bits per luma and chroma sample. It consists
+                                        ///  of a Y-plane followed by an interleaved UV plane.
+    X10Y10Z10W2_Float        = 0xB9,    ///< RGBA format with three 10-bit floats (6e4) and a 2-bit unorm as alpha.
+    Y216                     = 0xBA,    ///< YUV 4:2:2 packed, with 16 bits per luma or chroma sample. No alpha.
+    Y210                     = 0xBB,    ///< YUV 4:2:2 packed, with 10 bits per luma or chroma sample. No alpha.
+                                        ///  Same memory layout as @ref ChNumFormat::Y216.
+                                        ///  The lowest 6 bits of each sample are ignored.
+    Y416                     = 0xBC,    ///< YUV 4:4:4 packed, with 16 bits per luma or chroma sample.
+    Y410                     = 0xBD,    ///< YUV 4:4:4 packed, with 10 bits per luma or chroma sample and 2 bits for alpha.
+    _ReservedBE              = 0xBE,
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 923
+    P216                     = 0xBF,    ///< YUV 4:2:2 planar format, with 16 bits per luma and chroma sample. It consists
+                                        ///  of a Y-plane followed by interleaved UV plane.
+#endif
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 924
+    YUV_420P10               = 0xC0,    ///< YUV 4:2:0 tri-planar format, with 10 bits per luma and chroma sample.
+    YUV_422P10               = 0xC1,    ///< YUV 4:2:2 tri-planar format, with 10 bits per luma and chroma sample.
+    YUV_444P10               = 0xC2,    ///< YUV 4:4:4 tri-planar format, with 10 bits per luma and chroma sample.
+    YUV_420P12               = 0xC3,    ///< YUV 4:2:0 tri-planar format, with 12 bits per luma and chroma sample.
+    YUV_422P12               = 0xC4,    ///< YUV 4:2:2 tri-planar format, with 12 bits per luma and chroma sample.
+    YUV_444P12               = 0xC5,    ///< YUV 4:4:4 tri-planar format, with 12 bits per luma and chroma sample.
+    YUV_420P16               = 0xC6,    ///< YUV 4:2:0 tri-planar format, with 16 bits per luma and chroma sample.
+    YUV_422P16               = 0xC7,    ///< YUV 4:2:2 tri-planar format, with 16 bits per luma and chroma sample.
+    YUV_444P16               = 0xC8,    ///< YUV 4:4:4 tri-planar format, with 16 bits per luma and chroma sample.
+#endif
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 925
+    YV16                     = 0xC9,    ///< YVU 4:2:2 tri-planar format, with 8 bits per luma and chroma sample. This
+                                        ///  is similar to @ref ChNumFormat::YV12, except chroma is not subsampled in
+                                        ///  vertical direction.
+    YV24                     = 0xCA,    ///< YVU 4:4:4 tri-planar format, with 8 bits per luma and chroma sample. This
+                                        ///  is similar to @ref ChNumFormat::YV12, except chroma is not subsampled.
+    NV24                     = 0xCB,    ///< YUV 4:4:4 bi-planar format, with 8 bits per luma and chroma sample. This
+                                        ///  is similar to @ref ChNumFormat::NV12, except chroma is not subsampled.
+                                        ///  This format is sometimes referred to as P408.
+    P410                     = 0xCC,    ///< YUV 4:4:4 planar format, with 10 bits per luma and chroma sample. It consists
+                                        ///  of a Y-plane followed by interleaved UV plane.
+    P416                     = 0xCD,    ///< YUV 4:4:4 planar format, with 16 bits per luma and chroma sample. It consists
+                                        ///  of a Y-plane followed by interleaved UV plane.
+#endif
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 929
+    X16Y16Z16W16_MM10_Unorm  = 0xCE,    ///< A four component format with 16 bits of storage per component. Lowest 6 bits of
+                                        ///  each component are ignored.
+    X16Y16Z16W16_MM10_Uint   = 0xCF,    ///< A four component format with 16 bits of storage per component. Lowest 6 bits of
+                                        ///  each component are ignored.
+    X16Y16Z16W16_MM12_Unorm  = 0xD0,    ///< A four component format with 16 bits of storage per component. Lowest 4 bits of
+                                        ///  each component are ignored.
+    X16Y16Z16W16_MM12_Uint   = 0xD1,    ///< A four component format with 16 bits of storage per component. Lowest 4 bits of
+                                        ///  each component are ignored.
+#endif
+    Count,
+
+};
+
+/// Specifies which channel of a resource should be mapped to a particular component of an image view.
+///
+/// @ingroup ResourceBinding
+enum class ChannelSwizzle : Util::uint8
+{
+    Zero = 0x0,  ///< Ignore resource data and always fetch a 0 into this component.
+    One  = 0x1,  ///< Ignore resource data and always fetch a 1 into this component.
+    X    = 0x2,  ///< Use the X channel from resource for this component.
+    Y    = 0x3,  ///< Use the Y channel from resource for this component.
+    Z    = 0x4,  ///< Use the Z channel from resource for this component.
+    W    = 0x5,  ///< Use the W channel from resource for this component.
+    Count
+};
+
+/// Specifies a mapping for each component of an image or buffer view to a channel in its associated resource.
+///
+/// @ingroup ResourceBinding
+struct ChannelMapping
+{
+    union
+    {
+        struct
+        {
+            ChannelSwizzle r;          ///< Red component swizzle.
+            ChannelSwizzle g;          ///< Green component swizzle.
+            ChannelSwizzle b;          ///< Blue component swizzle.
+            ChannelSwizzle a;          ///< Alpha component swizzle.
+        };
+        ChannelSwizzle     swizzle[4]; ///< All four swizzles packed into one array.
+        Util::uint32       swizzleValue;
+    };
+};
+
+/// Specifies a pixel format for an image or memory view and its corresponding channel swizzle.
+struct SwizzledFormat
+{
+    ChNumFormat    format;  ///< Pixel format.
+    ChannelMapping swizzle; ///< Compatible channel swizzle for the above pixel format.
+};
+
+inline constexpr bool operator==(const SwizzledFormat& lhs, const SwizzledFormat& rhs)
+{
+    return (lhs.format == rhs.format) && (lhs.swizzle.swizzleValue == rhs.swizzle.swizzleValue);
+}
+
+/// Constant for undefined formats.
+constexpr SwizzledFormat UndefinedSwizzledFormat =
+{
+    ChNumFormat::Undefined,
+    { { { ChannelSwizzle::X, ChannelSwizzle::Zero, ChannelSwizzle::Zero, ChannelSwizzle::One } } },
+};
+
+/// Flags structure reporting available capabilities of a particular format.
+enum FormatFeatureFlags : Util::uint32
+{
+    FormatFeatureCopy                = 0x00001,  ///< Images of this format can be used as a copy source or destination.
+    FormatFeatureFormatConversion    = 0x00002,  ///< Images of this format support format conversion in copy
+                                                 ///  operations.
+    FormatFeatureImageShaderRead     = 0x00004,  ///< Images of this format can be read from a shader.
+    FormatFeatureImageShaderWrite    = 0x00008,  ///< Images of this format can be written from a shader.
+    FormatFeatureImageShaderAtomics  = 0x00010,  ///< Images of this format can be written atomically from a shader.
+    FormatFeatureMemoryShaderRead    = 0x00020,  ///< Memory views of this format can be read from a shader.
+    FormatFeatureMemoryShaderWrite   = 0x00040,  ///< Memory views of this format can be written from a shader.
+    FormatFeatureMemoryShaderAtomics = 0x00080,  ///< Memory views of this format can be written atomically from a
+                                                 ///  shader.
+    FormatFeatureColorTargetWrite    = 0x00100,  ///< Images of this format can be bound as a color target.
+    FormatFeatureColorTargetBlend    = 0x00200,  ///< Images of this format can be bound as a color target for blending.
+    FormatFeatureDepthTarget         = 0x00400,  ///< Images of this format can be bound as a depth target.
+    FormatFeatureStencilTarget       = 0x00800,  ///< Images of this format can be bound as a stencil target.
+    FormatFeatureMsaaTarget          = 0x01000,  ///< Images of this format can support multisampling.
+    FormatFeatureWindowedPresent     = 0x02000,  ///< Images of this format can support windowed-mode presents.
+                                                 ///  Fullscreen present capability is queried using the @ref
+                                                 ///  IScreen::GetScreenModeList method.
+    FormatFeatureImageFilterLinear   = 0x04000,  ///< Images of this format can be linearly filtered.
+    FormatFeatureImageFilterMinMax   = 0x08000,  ///< Images of this format can be min/max filtered.
+    FormatFeatureFormatConversionSrc = 0x10000,  ///< Images of this format support format conversion in copy
+                                                 ///  operations as the source image.
+                                                 ///  @note This is aliased to FormatFeatureFormatConversionDst for
+                                                 ///  backwards compatibility.
+    FormatFeatureFormatConversionDst = 0x20000,  ///< Images of this format support format conversion in copy
+                                                 ///  operations as the destination image.
+                                                 ///  @note This is aliased to FormatFeatureFormatConversionSrc for
+                                                 ///  backwards compatibility.
+};
+
+/// Enumeration for indexing into the format properties table based on tiling.
+enum FormatPropertiesTiling : Util::uint32
+{
+    IsLinear  = 0,  ///< Format properties requested is for linearly-tiled surfaces.
+    IsNonLinear,    ///< Format properties requested is for non-linearly tiled surfaces.
+    Count,          ///< Number of format property tile types.
+};
+
+/// The format properties lookup table.  Contains information about which device access features are available for all
+/// formats and tiling modes.  The tiling features for non-linear tiling modes are identical so we only store linear
+/// and non-linear tiling features.  From left to right, it is indexed by format and "is-non-linear".
+/// Returned by IDevice::GetFormatProperties().
+struct MergedFormatPropertiesTable
+{
+    FormatFeatureFlags features[static_cast<size_t>(ChNumFormat::Count)][FormatPropertiesTiling::Count];
+};
+
+} // Pal
@@ -0,0 +1,881 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palFormatInfo.h
+ * @brief Defines the Platform Abstraction Library (PAL) Format utility functions.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "palDevice.h"
+#include "palImage.h"
+#include "palInlineFuncs.h"
+#include "palMath.h"
+
+namespace Pal
+{
+
+/// Namespace encapsulating all PAL format utility functions.
+namespace Formats
+{
+
+/// Specifies flags which indicate properties of each PAL channel format.
+enum PropertyFlags : uint32
+{
+    BitCountInaccurate = 0x1,   ///< Indicates that format's bit count array is inaccurate
+    BlockCompressed    = 0x2,   ///< Indicates channel format is block-compressed
+    MacroPixelPacked   = 0x4,   ///< Indicates channel format has multiple pixels' data packed together into
+                                ///  one "macro pixel"
+    YuvPlanar          = 0x8,   ///< Indicates channel format is YUV-planar
+    YuvPacked          = 0x10,  ///< Indicates channel format is YUV packed
+};
+
+/// Specifies numeric support of a specified format.
+enum class NumericSupportFlags : uint32
+{
+    Undefined,      ///< No numeric support.
+    Unorm,          ///< Unsigned normalized.
+    Snorm,          ///< Signed normalized.
+    Uscaled,        ///< _Untested._ Treated as an unsigned integer inside the resource, but received by
+                    ///  the shader as a floating point number.
+    Sscaled,        ///< _Untested._ Treated as a signed integer inside the resource, but received by
+                    ///  the shader as a floating point number.
+    Uint,           ///< Unsigned integer.
+    Sint,           ///< Signed integer.
+    Float,          ///< Floating point number.
+    Srgb,           ///< sRGB.
+    DepthStencil,   ///< Depth/stencil support.
+    Yuv,            ///< YUV support.
+};
+
+/// Specifies flags which indicate the presence of each color channel in a PAL channel format.
+enum ChannelFlags : uint32
+{
+    X     = 0x1,    ///< Indicates the X channel is present.
+    Y     = 0x2,    ///< Indicates the Y channel is present.
+    Z     = 0x4,    ///< Indicates the Z channel is present.
+    W     = 0x8,    ///< Indicates the W channel is present.
+};
+
+/// An entry in the channel-format info lookup table. Contains intrinsic properties describing a channel format.
+struct FormatInfo
+{
+    uint32              bitsPerPixel;    ///< Total count of bits in a signel pixel (or block).
+    uint32              componentCount;  ///< Number of color components (channels) present.
+
+    uint32              bitCount[4];     ///< Number of bits for each component in the format. These members are
+                                         ///  only reliable if the 'bitCountInaccurate' flag is not set.
+                                         ///  Listed in order: X, Y, Z, and W.
+
+    uint32              channelMask;     ///< Mask of @ref ChannelFlags values indicating which channels are present.
+    uint32              properties;      ///< Mask of @ref PropertyFlags values indicating which properties a format
+                                         ///  has.
+    NumericSupportFlags numericSupport;  ///< Which numeric format this format represents. Used for easy identification.
+};
+
+/// BC block dimension (4x4)
+static constexpr uint32 CompressedBcBlockDim = 4;
+
+/// ETC block dimension (4x4)
+static constexpr uint32 CompressedEtcBlockDim = 4;
+
+/// Lookup table for intrinsic properties describing each channel format. Callers should access the members of this
+/// table via BitsPerPixel() and related functions.
+extern const FormatInfo FormatInfoTable[static_cast<size_t>(ChNumFormat::Count)];
+
+/// Convert a floating-point representation of a color value in RGBA order to the appropriate bit representation for
+/// each channel based on the specified format. Swizzling is enabled by default to maintain backwards compatability.
+/// There will be no swizzling functionality going forwards.
+extern void ConvertColor(
+    SwizzledFormat format,
+    const float*   pColorIn,
+    uint32*        pColorOut);
+
+/// Convert an unsigned integer representation of a color value in YUVA order to the appropriate bit representation for
+/// each channel based on the specified format.
+extern void ConvertYuvColor(
+    SwizzledFormat format,
+    uint32         plane,
+    const uint32*  pColorIn,
+    uint32*        pColorOut);
+
+/// Packs a clear color value in RGBA order to a single element of the provided format and stores it in the
+/// memory provided. Swizzling is enabled by default to maintain backwards compatability. There will be
+/// no swizzling functionality going forwards.
+extern void PackRawClearColor(
+    SwizzledFormat format,
+    const uint32*  pColor,
+    void*          pBufferMemory);
+
+/// Swizzles the color according to the provided format swizzle.
+extern void SwizzleColor(SwizzledFormat format, const uint32* pColorIn, uint32* pColorOut);
+
+/// Compares two SwizzledFormats and checks for equality.
+///
+/// @param lhs [in] Left hand side of comparison
+/// @param rhs [in] Right hand side of comparison
+///
+/// @return True if the formats are equal, false otherwise.
+constexpr bool IsSameFormat(
+    const SwizzledFormat& lhs,
+    const SwizzledFormat& rhs)
+{
+    return ((lhs.format == rhs.format) && (lhs.swizzle.swizzleValue == rhs.swizzle.swizzleValue));
+}
+
+/// Queries the number of components for a particular channel format.
+///
+/// @param [in] format The channel format to query for.
+///
+/// @returns The number of components of the specified channel format.
+inline uint32 NumComponents(
+    ChNumFormat format)
+{
+    return FormatInfoTable[static_cast<size_t>(format)].componentCount;
+}
+
+/// Queries the component mask for a particular format.
+///
+/// @param [in] format The format to query for.
+///
+/// @returns The component mask of @ref ChannelFlags for the specified format.
+inline uint32 ComponentMask(
+    ChNumFormat format)
+{
+    uint32 mask = FormatInfoTable[static_cast<size_t>(format)].channelMask;
+    PAL_ASSERT((mask & 0xF) == mask);
+    return mask;
+}
+
+/// Checks if a format is undefined.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns True if the pixel format is undefined. False otherwise.
+constexpr bool IsUndefined(
+    ChNumFormat format)
+{
+    return (format == ChNumFormat::Undefined);
+}
+
+/// Checks if a format's numeric representation is unsigned normalized.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns True if the pixel format is unsigned normalized. False otherwise.
+inline bool IsUnorm(
+    ChNumFormat format)
+{
+    return (FormatInfoTable[static_cast<size_t>(format)].numericSupport == NumericSupportFlags::Unorm);
+}
+
+/// Checks if a format's numeric representation is signed normalized.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns True if the pixel format is signed normalized. False otherwise.
+inline bool IsSnorm(
+    ChNumFormat format)
+{
+    return (FormatInfoTable[static_cast<size_t>(format)].numericSupport == NumericSupportFlags::Snorm);
+}
+
+/// Checks if a format's numeric representation is unsigned scaled.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns True if the pixel format is unsigned scaled. False otherwise.
+inline bool IsUscaled(
+    ChNumFormat format)
+{
+    return (FormatInfoTable[static_cast<size_t>(format)].numericSupport == NumericSupportFlags::Uscaled);
+}
+
+/// Checks if a format's numeric representation is signed scaled.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns True if the pixel format is signed scaled. False otherwise.
+inline bool IsSscaled(
+    ChNumFormat format)
+{
+    return (FormatInfoTable[static_cast<size_t>(format)].numericSupport == NumericSupportFlags::Sscaled);
+}
+
+/// Checks if a format's numeric representation is unsigned integer.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns True if the pixel format is unsigned integer. False otherwise.
+inline bool IsUint(
+    ChNumFormat format)
+{
+    return (FormatInfoTable[static_cast<size_t>(format)].numericSupport == NumericSupportFlags::Uint);
+}
+
+/// Checks if a format's numeric representation is signed integer.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns True if the pixel format is signed integer. False otherwise.
+inline bool IsSint(
+    ChNumFormat format)
+{
+    return (FormatInfoTable[static_cast<size_t>(format)].numericSupport == NumericSupportFlags::Sint);
+}
+
+/// Checks if a format's numeric representation is floating point.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns True if the pixel format is floating point. False otherwise.
+inline bool IsFloat(
+    ChNumFormat format)
+{
+    return (FormatInfoTable[static_cast<size_t>(format)].numericSupport == NumericSupportFlags::Float);
+}
+
+/// Checks if a format's numeric representation is gamma-corrected sRGB.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns True if the pixel format is sRGB. False otherwise.
+inline bool IsSrgb(
+    ChNumFormat format)
+{
+    return (FormatInfoTable[static_cast<size_t>(format)].numericSupport == NumericSupportFlags::Srgb);
+}
+
+/// Checks if a format's numeric representation is normalized.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns True if the pixel format is normalized. False otherwise.
+inline bool IsNormalized(
+    ChNumFormat format)
+{
+    return IsUnorm(format) || IsSnorm(format);
+}
+
+/// Checks if a format's numeric representation is an integer format.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns True if the pixel format is an integer format. False otherwise.
+inline bool IsInteger(
+    ChNumFormat format)
+{
+    return IsUint(format) || IsSint(format);
+}
+
+/// Checks if a format is a depth/stencil only format.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns True if the pixel format is a depth/stencil only format. False otherwise.
+inline bool IsDepthStencilOnly(
+    ChNumFormat format)
+{
+    return (FormatInfoTable[static_cast<size_t>(format)].numericSupport == NumericSupportFlags::DepthStencil);
+}
+
+/// Checks if the specified format is one of the YUV-planar ones.
+///
+/// @param [in] format The format to check.
+///
+/// @returns True if the specified format is YUV-planar. False otherwise.
+inline bool IsYuvPlanar(
+    ChNumFormat format)
+{
+    return ((FormatInfoTable[static_cast<size_t>(format)].properties & YuvPlanar) != 0);
+}
+
+/// Checks if the specified format is one of the YUV-packed ones.
+///
+/// @param [in] format The format to check.
+///
+/// @returns True if the specified format is YUV-packed. False otherwise.
+inline bool IsYuvPacked(
+    ChNumFormat format)
+{
+    return ((FormatInfoTable[static_cast<size_t>(format)].properties & YuvPacked) != 0);
+}
+
+/// Checks if the specified format is one of the YUV ones.
+///
+/// @param [in] format The format to check.
+///
+/// @returns True if the specified format is for YUV data. False otherwise.
+inline bool IsYuv(
+    ChNumFormat format)
+{
+    return (FormatInfoTable[static_cast<size_t>(format)].numericSupport == NumericSupportFlags::Yuv);
+}
+
+/// Checks if a format has alpha.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns True if the pixel format has an alpha channel. False otherwise.
+constexpr bool HasAlpha(
+    SwizzledFormat format)
+{
+    return ((format.format == ChNumFormat::A8_Unorm)                ||
+            (format.format == ChNumFormat::L4A4_Unorm)              ||
+            (format.format == ChNumFormat::L8A8_Unorm)              ||
+            ((ComponentMask(format.format) & ChannelFlags::W) != 0) ||
+            ((format.swizzle.a != ChannelSwizzle::Zero) && (format.swizzle.a != ChannelSwizzle::One)));
+}
+
+/// Checks if a format has an unused alpha channel.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns True if the pixel format is a four channel format and has an unused alpha channel. False otherwise.
+inline bool HasUnusedAlpha(
+    SwizzledFormat format)
+{
+    return ((NumComponents(format.format) == 4)     &&
+            (format.swizzle.r != ChannelSwizzle::W) &&
+            (format.swizzle.g != ChannelSwizzle::W) &&
+            (format.swizzle.b != ChannelSwizzle::W) &&
+            (format.swizzle.a != ChannelSwizzle::W));
+}
+
+/// Converts format into its Unorm equivalent.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns Format equivalent of input format. Undefined if none exist.
+extern ChNumFormat PAL_STDCALL ConvertToUnorm(ChNumFormat format);
+
+/// Converts format into its Snorm equivalent.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns Format equivalent of input format. Undefined if none exist.
+extern ChNumFormat PAL_STDCALL ConvertToSnorm(ChNumFormat format);
+
+/// Converts format into its Uscaled equivalent.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns Format equivalent of input format. Undefined if none exist.
+extern ChNumFormat PAL_STDCALL ConvertToUscaled(ChNumFormat format);
+
+/// Converts format into its Sscaled equivalent.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns Format equivalent of input format. Undefined if none exist.
+extern ChNumFormat PAL_STDCALL ConvertToSscaled(ChNumFormat format);
+
+/// Converts format into its Uint equivalent.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns Uint format equivalent of input format. Undefined if none exist.
+extern ChNumFormat PAL_STDCALL ConvertToUint(ChNumFormat format);
+
+/// Converts format into its Sint equivalent.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns Sint format equivalent of input format. Undefined if none exist.
+extern ChNumFormat PAL_STDCALL ConvertToSint(ChNumFormat format);
+
+/// Converts format into its Float equivalent.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns Float format equivalent of input format. Undefined if none exist.
+extern ChNumFormat PAL_STDCALL ConvertToFloat(ChNumFormat format);
+
+/// Converts format into its Srgb equivalent.
+///
+/// @param [in] format Pixel format.
+///
+/// @returns Srgb format equivalent of input format. Undefined if none exist.
+extern ChNumFormat PAL_STDCALL ConvertToSrgb(ChNumFormat format);
+
+/// Converts source numeric format to the provided destination numeric format.
+///
+/// @param [in] srcFormat Source Pixel format.
+/// @param [in] dstFormat Destination Pixel format.
+///
+/// @returns Source format with equivalent numeric format of destination format. Undefined if none exist.
+extern ChNumFormat ConvertToDstNumFmt(ChNumFormat srcFormat, ChNumFormat dstFormat);
+
+/// Determines whether the srcFormat and the dstFormat have the same channel formats.
+///
+/// @param [in] srcFormat Source channel pixel format.
+/// @param [in] dstFormat Destination channel pixel format.
+///
+/// @returns True if both formats share the same channel format. False otherwise.
+extern bool ShareChFmt(ChNumFormat srcFormat, ChNumFormat dstFormat);
+
+/// Determines whether the srcFormat and the dstFormat have the same numeric formats.
+///
+/// @param [in] srcFormat Source channel pixel format.
+/// @param [in] dstFormat Destination channel pixel format.
+///
+/// @returns True if both formats share the same numeric format. False otherwise.
+inline bool HaveSameNumFmt(
+    ChNumFormat srcFormat,
+    ChNumFormat dstFormat)
+{
+    return (FormatInfoTable[static_cast<size_t>(srcFormat)].numericSupport ==
+            FormatInfoTable[static_cast<size_t>(dstFormat)].numericSupport);
+}
+
+/// Returns the block dimension for a compressed format.
+///
+/// @param [in] format Format.
+///
+/// @returns Corresponding block dimensions for the compressed format.
+inline Extent3d CompressedBlockDim(
+    ChNumFormat format)
+{
+    Extent3d blockDim = {};
+
+    switch (format)
+    {
+    case ChNumFormat::Bc1_Unorm:
+    case ChNumFormat::Bc1_Srgb:
+    case ChNumFormat::Bc2_Unorm:
+    case ChNumFormat::Bc2_Srgb:
+    case ChNumFormat::Bc3_Unorm:
+    case ChNumFormat::Bc3_Srgb:
+    case ChNumFormat::Bc4_Unorm:
+    case ChNumFormat::Bc4_Snorm:
+    case ChNumFormat::Bc5_Unorm:
+    case ChNumFormat::Bc5_Snorm:
+    case ChNumFormat::Bc6_Ufloat:
+    case ChNumFormat::Bc6_Sfloat:
+    case ChNumFormat::Bc7_Unorm:
+    case ChNumFormat::Bc7_Srgb:
+        blockDim.width  = CompressedBcBlockDim;
+        blockDim.height = CompressedBcBlockDim;
+        blockDim.depth  = 1;
+        break;
+    case ChNumFormat::Etc2X8Y8Z8_Unorm:
+    case ChNumFormat::Etc2X8Y8Z8_Srgb:
+    case ChNumFormat::Etc2X8Y8Z8W1_Unorm:
+    case ChNumFormat::Etc2X8Y8Z8W1_Srgb:
+    case ChNumFormat::Etc2X8Y8Z8W8_Unorm:
+    case ChNumFormat::Etc2X8Y8Z8W8_Srgb:
+    case ChNumFormat::Etc2X11_Unorm:
+    case ChNumFormat::Etc2X11_Snorm:
+    case ChNumFormat::Etc2X11Y11_Unorm:
+    case ChNumFormat::Etc2X11Y11_Snorm:
+        blockDim.width  = CompressedEtcBlockDim;
+        blockDim.height = CompressedEtcBlockDim;
+        blockDim.depth  = 1;
+        break;
+    case ChNumFormat::AstcLdr4x4_Unorm:
+    case ChNumFormat::AstcLdr4x4_Srgb:
+    case ChNumFormat::AstcHdr4x4_Float:
+        blockDim.width  = 4;
+        blockDim.height = 4;
+        blockDim.depth  = 1;
+        break;
+    case ChNumFormat::AstcLdr5x4_Unorm:
+    case ChNumFormat::AstcLdr5x4_Srgb:
+    case ChNumFormat::AstcHdr5x4_Float:
+        blockDim.width  = 5;
+        blockDim.height = 4;
+        blockDim.depth  = 1;
+        break;
+    case ChNumFormat::AstcLdr5x5_Unorm:
+    case ChNumFormat::AstcLdr5x5_Srgb:
+    case ChNumFormat::AstcHdr5x5_Float:
+        blockDim.width  = 5;
+        blockDim.height = 5;
+        blockDim.depth  = 1;
+        break;
+    case ChNumFormat::AstcLdr6x5_Unorm:
+    case ChNumFormat::AstcLdr6x5_Srgb:
+    case ChNumFormat::AstcHdr6x5_Float:
+        blockDim.width  = 6;
+        blockDim.height = 5;
+        blockDim.depth  = 1;
+        break;
+    case ChNumFormat::AstcLdr6x6_Unorm:
+    case ChNumFormat::AstcLdr6x6_Srgb:
+    case ChNumFormat::AstcHdr6x6_Float:
+        blockDim.width  = 6;
+        blockDim.height = 6;
+        blockDim.depth  = 1;
+        break;
+    case ChNumFormat::AstcLdr8x5_Unorm:
+    case ChNumFormat::AstcLdr8x5_Srgb:
+    case ChNumFormat::AstcHdr8x5_Float:
+        blockDim.width  = 8;
+        blockDim.height = 5;
+        blockDim.depth  = 1;
+        break;
+    case ChNumFormat::AstcLdr8x6_Unorm:
+    case ChNumFormat::AstcLdr8x6_Srgb:
+    case ChNumFormat::AstcHdr8x6_Float:
+        blockDim.width  = 8;
+        blockDim.height = 6;
+        blockDim.depth  = 1;
+        break;
+    case ChNumFormat::AstcLdr8x8_Unorm:
+    case ChNumFormat::AstcLdr8x8_Srgb:
+    case ChNumFormat::AstcHdr8x8_Float:
+        blockDim.width  = 8;
+        blockDim.height = 8;
+        blockDim.depth  = 1;
+        break;
+    case ChNumFormat::AstcLdr10x5_Unorm:
+    case ChNumFormat::AstcLdr10x5_Srgb:
+    case ChNumFormat::AstcHdr10x5_Float:
+        blockDim.width  = 10;
+        blockDim.height = 5;
+        blockDim.depth  = 1;
+        break;
+    case ChNumFormat::AstcLdr10x6_Unorm:
+    case ChNumFormat::AstcLdr10x6_Srgb:
+    case ChNumFormat::AstcHdr10x6_Float:
+        blockDim.width  = 10;
+        blockDim.height = 6;
+        blockDim.depth  = 1;
+        break;
+    case ChNumFormat::AstcLdr10x8_Unorm:
+    case ChNumFormat::AstcLdr10x8_Srgb:
+    case ChNumFormat::AstcHdr10x8_Float:
+        blockDim.width  = 10;
+        blockDim.height = 8;
+        blockDim.depth  = 1;
+        break;
+    case ChNumFormat::AstcLdr10x10_Unorm:
+    case ChNumFormat::AstcLdr10x10_Srgb:
+    case ChNumFormat::AstcHdr10x10_Float:
+        blockDim.width  = 10;
+        blockDim.height = 10;
+        blockDim.depth  = 1;
+        break;
+    case ChNumFormat::AstcLdr12x10_Unorm:
+    case ChNumFormat::AstcLdr12x10_Srgb:
+    case ChNumFormat::AstcHdr12x10_Float:
+        blockDim.width  = 12;
+        blockDim.height = 10;
+        blockDim.depth  = 1;
+        break;
+    case ChNumFormat::AstcLdr12x12_Unorm:
+    case ChNumFormat::AstcLdr12x12_Srgb:
+    case ChNumFormat::AstcHdr12x12_Float:
+        blockDim.width  = 12;
+        blockDim.height = 12;
+        blockDim.depth  = 1;
+        break;
+    default:
+        // This function should not be called on a non-compressed format.
+        PAL_ASSERT_ALWAYS();
+        break;
+    }
+
+    return blockDim;
+}
+
+/// Convert a compressed format block coordinate to texels.
+///
+/// @param [in]  format      Format.
+/// @param [in]  width       Block width.
+/// @param [in]  height      Block height.
+/// @param [in]  depth       Block depth.
+///
+/// @returns Structure containing the texel width, height and depth
+inline Extent3d CompressedBlocksToTexels(
+    ChNumFormat format,
+    uint32      width,
+    uint32      height,
+    uint32      depth)
+{
+    Extent3d dims = CompressedBlockDim(format);
+    dims.width  *= width;
+    dims.height *= height;
+    dims.depth  *= depth;
+    return dims;
+}
+
+/// Convert a compressed format texel coordinate to blocks.
+///
+/// @param [in] format      Format.
+/// @param [in] width       Texel width.
+/// @param [in] height      Texel height.
+/// @param [in] depth       Texel depth.
+///
+/// @returns Structure containing the block width, height and depth
+inline Extent3d CompressedTexelsToBlocks(
+    ChNumFormat format,
+    uint32      width,
+    uint32      height,
+    uint32      depth)
+{
+    Extent3d dims = CompressedBlockDim(format);
+    dims.width  = Util::RoundUpQuotient(width,  dims.width);
+    dims.height = Util::RoundUpQuotient(height, dims.height);
+    dims.depth  = Util::RoundUpQuotient(depth,  dims.depth);
+    return dims;
+}
+
+/// Queries the number of bits in a pixel or element for the given format.
+///
+/// @param format The format to query for.
+///
+/// @return The number of bits per pixel for the given channel format.
+inline uint32 BitsPerPixel(
+    ChNumFormat format)
+{
+    return FormatInfoTable[static_cast<size_t>(format)].bitsPerPixel;
+}
+
+/// Queries the number of bits in a pixel or element for the given format.
+///
+/// @param format The format to query for.
+///
+/// @return The number of bytes per pixel for the given channel format.
+inline uint32 BytesPerPixel(
+    ChNumFormat format)
+{
+    return (BitsPerPixel(format) >> 3);
+}
+
+/// Checks if the specified channel swizzle is allowed with the given format.
+///
+/// @param [in] format  The pixel format to check against.
+/// @param [in] swizzle The specified channel swizzle to check with.
+///
+/// @returns True if the specified channel swizzle is valid for the given format. False otherwise.
+inline bool IsValidChannelSwizzle(
+    ChNumFormat    format,
+    ChannelSwizzle swizzle)
+{
+    const uint32 mask = ComponentMask(format);
+
+    bool valid = false;
+    switch (swizzle)
+    {
+    case ChannelSwizzle::Zero:
+    case ChannelSwizzle::One:
+        valid = true;
+        break;
+    case ChannelSwizzle::X:
+        valid = ((mask & ChannelFlags::X) != 0);
+        break;
+    case ChannelSwizzle::Y:
+        valid = ((mask & ChannelFlags::Y) != 0);
+        break;
+    case ChannelSwizzle::Z:
+        valid = ((mask & ChannelFlags::Z) != 0);
+        break;
+    case ChannelSwizzle::W:
+        valid = ((mask & ChannelFlags::W) != 0);
+        break;
+    default:
+        PAL_NEVER_CALLED();
+        break;
+    }
+
+    return valid;
+}
+
+/// Queries the per-component bit counts for a particular format.
+///
+/// @param [in] format The format to query for.
+///
+/// @returns The corresponding component swizzles for the specified format. Returned as an array of four counts.
+inline const uint32* ComponentBitCounts(
+    ChNumFormat format)
+{
+    return &FormatInfoTable[static_cast<size_t>(format)].bitCount[0];
+}
+
+/// Determines the maximum bit-count of any component in the format.
+///
+/// @param [in] format The channel format to query for.
+///
+/// @returns The maximum bit-count of any component in the format.
+inline uint32 MaxComponentBitCount(
+    ChNumFormat format)
+{
+    const FormatInfo& info = FormatInfoTable[static_cast<size_t>(format)];
+
+    return Util::Max(Util::Max(info.bitCount[0], info.bitCount[1]), Util::Max(info.bitCount[2], info.bitCount[3]));
+}
+
+/// Checks if the specified format is one of the block-compressed ones.
+///
+/// @param [in] format The format to check.
+///
+/// @returns True if the specified format is block-compressed. False otherwise.
+inline bool IsBlockCompressed(
+    ChNumFormat format)
+{
+    return ((FormatInfoTable[static_cast<size_t>(format)].properties & BlockCompressed) != 0);
+}
+
+/// Checks if the specified format is one of the macro-pixel-packed ones.
+///
+/// @param [in] format The format to check.
+///
+/// @returns True if the specified format is macro-pixel-packed. False otherwise.
+inline bool IsMacroPixelPacked(
+    ChNumFormat format)
+{
+    return ((FormatInfoTable[static_cast<size_t>(format)].properties & MacroPixelPacked) != 0);
+}
+
+/// Checks if the specified format is one of the rgb macro-pixel-packed ones.
+///
+/// @param [in] format The format to check.
+///
+/// @returns True if the specified format is a rgb macro-pixel-packed. False otherwise.
+inline bool IsMacroPixelPackedRgbOnly(
+    ChNumFormat format)
+{
+    return (IsMacroPixelPacked(format) && (IsYuv(format) == false));
+}
+
+/// Returns the base-2 logarithm of of the subsampling ratio between the luma plane and chroma plane(s) of a YUV planar
+/// format. The dimensions of the luma plane should be right-shifted by these amounts to determine the dimensions of the
+/// chroma plane(s).
+///
+/// @param [in] format  Format.
+/// @param [in] plane   Image plane to query for.
+///
+/// @returns Corresponding scaling factors between the luma plane and chroma plane(s).
+inline Extent3d Log2SubsamplingRatio(
+    ChNumFormat format,
+    uint32      plane)
+{
+    // All planes for formats which are not YUV planar, and the 0th plane of a YUV planar format (the luma plane) are
+    // sampled at full rate, so the ratio is { log2(1), log2(1), log2(1) }, which equates to { 0,0,0 }.
+    Extent3d ratio = { };
+
+    if (IsYuvPlanar(format) && (plane != 0))
+    {
+        PAL_ASSERT((plane == 1) || (plane == 2));
+        switch (format)
+        {
+        // 4:4:4 formats have the same number of samples in every direction.
+        case ChNumFormat::P412:
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 924
+        case ChNumFormat::YUV_444P10:
+        case ChNumFormat::YUV_444P12:
+        case ChNumFormat::YUV_444P16:
+#endif
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 925
+        case ChNumFormat::YV24:
+        case ChNumFormat::NV24:
+        case ChNumFormat::P410:
+        case ChNumFormat::P416:
+#endif
+            break;
+        // 4:2:0 formats have 1/2 as many samples in both the horizontal and vertical directions.
+        case ChNumFormat::YV12:
+        case ChNumFormat::NV12:
+        case ChNumFormat::NV21:
+        case ChNumFormat::P010:
+        case ChNumFormat::P012:
+        case ChNumFormat::P016:
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 924
+        case ChNumFormat::YUV_420P10:
+        case ChNumFormat::YUV_420P12:
+        case ChNumFormat::YUV_420P16:
+#endif
+            ratio.width  = 1;  // log2(1/2) = -1
+            ratio.height = 1;
+            break;
+        // 4:2:2 formats have 1/2 as many samples in the horizontal direction, and the same number of samples
+        // in the vertical direction.
+        case ChNumFormat::P208:
+        case ChNumFormat::P210:
+        case ChNumFormat::P212:
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 923
+        case ChNumFormat::P216:
+#endif
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 924
+        case ChNumFormat::YUV_422P10:
+        case ChNumFormat::YUV_422P12:
+        case ChNumFormat::YUV_422P16:
+#endif
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 925
+        case ChNumFormat::YV16:
+#endif
+            ratio.width = 1;
+            break;
+        // 4:1:1 formats have 1/4 as many samples in the horizontal direction, and the same number of samples
+        // in the vertical direction.
+        case ChNumFormat::NV11:
+            ratio.width = 2;   // log2(1/4) = -2
+            break;
+        default:
+            PAL_NEVER_CALLED(); // Did we miss a new YUV planar format?
+            break;
+        }
+    }
+
+    return ratio;
+}
+
+/// Converts a linearly-scaled color value to gamma-corrected sRGB.
+///
+/// @param [in] linear Linear color value
+///
+/// @returns Gamma-corrected sRGB color value
+extern float LinearToGamma(float linear);
+
+/// Converts a gamma-corrected sRGB color value to linear color space.
+///
+/// @param [in] gammaCorrectedVal Gamma-corrected sRGB color value
+///
+/// @returns Linear color value
+extern float GammaToLinear(float gammaCorrectedVal);
+
+/// Checks to see if a given format is a MM format
+///
+///
+/// @returns bool is it an MM format
+extern bool IsMmFormat(ChNumFormat format);
+
+/// Checks to see if a given format is a MM12 format
+///
+///
+/// @returns bool is it an MM12 format
+extern bool IsMm12Format(ChNumFormat format);
+
+/// Checks to see if a given format is a MM10 format
+///
+///
+/// @returns bool is it an MM10 format
+extern bool IsMm10Format(ChNumFormat format);
+
+} // Formats
+} // Pal
@@ -0,0 +1,711 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palGpuMemory.h
+ * @brief Defines the Platform Abstraction Library (PAL) IGpuMemory interface and related types.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palDestroyable.h"
+
+#if defined(_WIN32)
+struct _SECURITY_ATTRIBUTES;
+#endif
+
+namespace Pal
+{
+
+// Forward declarations.
+class IGpuMemory;
+class IDevice;
+class IImage;
+enum class VaRange : uint32;
+
+/// Specifies Base Level priority per GPU memory allocation as a hint to the memory manager in the event it needs to
+/// select allocations to page out of their preferred heaps.
+enum class GpuMemPriority : uint32
+{
+    Unused    = 0x0,  ///< Indicates that the allocation is not currently being used at all, and should be the first
+                      ///  choice to be paged out.
+    VeryLow   = 0x1,  ///< Lowest priority to keep in its preferred heap.
+    Low       = 0x2,  ///< Low priority to keep in its preferred heap.
+    Normal    = 0x3,  ///< Normal priority to keep in its preferred heap.
+    High      = 0x4,  ///< High priority to keep in its preferred heap (e.g., render targets).
+    VeryHigh  = 0x5,  ///< Highest priority to keep in its preferred heap.  Last choice to be paged out (e.g., page
+                      ///  tables, displayable allocations).
+    Count
+};
+
+/// Specifies a finer granularity to the base Level priority per GPU memory allocation as a hint to the memory manager
+/// in the event it needs to select allocations to page out of their preferred heaps.
+enum class GpuMemPriorityOffset : uint32
+{
+    Offset0  = 0x0, ///< Same priority as Base Level
+    Offset1  = 0x1, ///< Next priority from Base Level
+    Offset2  = 0x2, ///< Next priority from Base Level.
+    Offset3  = 0x3, ///< Next priority from Base Level.
+    Offset4  = 0x4, ///< Next priority from Base Level.
+    Offset5  = 0x5, ///< Next priority from Base Level.
+    Offset6  = 0x6, ///< Next priority from Base Level
+    Offset7  = 0x7, ///< Highest priority from Base Level
+    Count
+};
+
+/// Speicfies access mode for unmapped pages in a virtual Gpu Memory.
+enum class VirtualGpuMemAccessMode : uint32
+{
+    Undefined = 0x0, ///< Used in situations where no special accessMode needed.
+    NoAccess  = 0x1, ///< All accesses of unmapped pages will trigger a GPU page fault.
+    ReadZero  = 0x2, ///< Reads of unmapped pages return zero, and writes are discarded.
+    Count
+};
+
+/// Controls the behavior of this allocation with respect to the MALL.
+enum class GpuMemMallPolicy : uint32
+{
+    Default = 0x0,  ///< MALL policy is decided by the driver.
+    Never   = 0x1,  ///< This allocation is never put through the MALL.
+    Always  = 0x2,  ///< This allocation is always put through the MALL.
+};
+
+/// Bitmask of cases where RPM view memory accesses will bypass the MALL.
+enum RpmViewsBypassMall : uint32
+{
+    RpmViewsBypassMallOff         = 0x0, ///< Disable MALL bypass
+    RpmViewsBypassMallOnRead      = 0x1, ///< Skip MALL for read access of views created in RPM
+    RpmViewsBypassMallOnWrite     = 0x2, ///< Skip MALL for write access of views created in RPM
+    RpmViewsBypassMallOnCbDbWrite = 0x4, ///< Control the RPM CB/DB behavior
+
+};
+
+/// Used for specifying a subregion of the allocation as having a different mall policy from the rest of the
+/// allocation.
+struct GpuMemMallRange
+{
+    uint32  startPage; ///< Starting 4k page that will obey the specified mallPolicy.
+    uint32  numPages;  ///< Number of 4k pages that will obey the specified mallPolicy.
+};
+
+/// Specifies flags for @ref IGpuMemory creation.
+union GpuMemoryCreateFlags
+{
+    struct
+    {
+        uint64 virtualAlloc                 :  1; ///< Create a _virtual_ as opposed to _real_ GPU memory allocation.
+                                                  ///  Only VA space will be allocated, and pages must be mapped via
+                                                  ///  IQueue::RemapVirtualMemoryPages().
+        uint64 shareable                    :  1; ///< Memory can be shared between devices in the same process that
+                                                  ///  report the sharedMemory flag from
+                                                  /// IDevice::GetMultiGpuCompatibility().
+        uint64 interprocess                 :  1; ///< Memory will be visible to other processes
+                                                  ///  (they may choose to open it).
+        uint64 presentable                  :  1; ///< Memory can be bound to an image that will be used by presents.
+        uint64 flippable                    :  1; ///< Memory can be bound to an image that will be used by flip
+                                                  ///  presents.
+        uint64 stereo                       :  1; ///< Memory will be used for stereo (DXGI or AQBS stereo).
+        uint64 globallyCoherent             :  1; ///< Memory needs to be globally coherent,
+                                                  ///  indicating the driver must manage both
+                                                  ///  CPU caches and GPU caches that are not flushed on
+                                                  ///  command buffer boundaries.
+        uint64 xdmaBuffer                   :  1; ///< GPU memory will be used for an XDMA cache buffer for
+                                                  ///  transferring data
+                                                  ///  between GPUs in a multi-GPU configuration.
+        uint64 turboSyncSurface             :  1; ///< The memory will be used for TurboSync private swapchain primary.
+        uint64 typedBuffer                  :  1; ///< GPU memory will be permanently considered a single
+                                                  ///  typed buffer pseudo-object
+                                                  ///  with the properties given in typedBufferInfo.
+        uint64 globalGpuVa                  :  1; ///< The GPU virtual address must be visible to all devices.
+        uint64 useReservedGpuVa             :  1; ///< Use GPU virtual address previously reserved by another
+                                                  ///  memory object. It is invalid when using the shadow descriptor
+                                                  ///  table VA range.
+        uint64 autoPriority                 :  1; ///< Allow the platform to automatically determine the priority of
+                                                  ///  this GPU memory allocation. Flag is only valid if the device
+                                                  ///  reports that it supports this feature, and will result in an
+                                                  ///  error otherwise.
+        uint64 busAddressable               :  1; ///< Create Bus Addressable memory. Allow memory to be used by other
+                                                  ///  device on the PCIe bus by exposing a write-only bus address.
+        uint64 sdiExternal                  :  1; ///< Create External Physical memory from an already allocated memory
+                                                  ///  on remote device. Similar to virtual allocations (no physical
+                                                  ///  backing) but have an immutable page mapping. The client must
+                                                  ///  specify surfaceBusAddr and markerBusAddr either at creation time
+                                                  ///  in GpuMemoryCreateInfo or by calling SetSdiRemoteBusAddress
+                                                  ///  once before using the GPU memory. The page mappings for an
+                                                  ///  allocation with this flag set must be initialized by including a
+                                                  ///  reference to it in the ppExternPhysMem list for the first
+                                                  ///  submission that references it.
+        uint64 sharedViaNtHandle            :  1; ///< Memory will be shared by using Nt handle.
+        uint64 peerWritable                 :  1; ///< The memory can be open as peer memory and be writable.
+        uint64 tmzProtected                 :  1; ///< The memory is protected using TMZ (Trusted Memory Zone) or HSFB
+                                                  ///  (Hybrid Secure Framebuffer). It is not CPU accessible,
+                                                  ///  and GPU access is restricted by the hardware such that data
+                                                  ///  cannot be copied from protected memory into unprotected memory.
+        uint64 placeholder0                 :  1; ///< Placeholder.
+        uint64 externalOpened               :  1; ///< Specifies the GPUMemory is opened.
+        uint64 restrictedContent            :  1; ///< Specifies the GPUMemory is protected content.
+        uint64 restrictedAccess             :  1; ///< Specifies the GPUMemory is restricted shared access resource.
+        uint64 crossAdapter                 :  1; ///< Specifies the GPUMemory is shared cross-adapter resource.
+        uint64 cpuInvisible                 :  1; ///< By default, PAL makes every allocation CPU visible if all of its
+                                                  ///  preferred
+                                                  ///< heaps are CPU visible. This flag can be used to override this
+                                                  ///  behavior when the client knows the memory will never be mapped
+                                                  ///  for CPU access. If this flag is set, calls to IGpuMemory::Map()
+                                                  ///  on this object will fail.
+        uint64 gl2Uncached                  :  1; ///< Specifies the GPU Memory is un-cached on GPU L2 cache.
+                                                  ///  But the memory still would be cached by other cache hierarchy
+                                                  ///  like L0, RB caches, L1, and L3.
+        uint64 mallRangeActive              :  1; ///< If set, then this allocation will be partially allocated in the
+                                                  ///  MALL. If this is set, then the mallPolicy enumeration must be set
+                                                  ///  to either "always" or "never".
+        uint64 explicitSync                 :  1; ///< If set, shared memory will skip syncs in the kernel and all
+                                                  ///  drivers that use this memory must handle syncs explicitly.
+        uint64 privPrimary                  :  1; ///< This is a private primary surface gpu memory.
+        uint64 privateScreen                :  1; ///< GPU memory will be used for a private screen image.
+        uint64 kmdShareUmdSysMem            :  1; ///< UMD will allocate/free a memory buffer to be shared with KMD.
+        uint64 deferCpuVaReservation        :  1; ///< KMD will allocate with the "CpuVisibleOnDemand" alloc flag.
+                                                  ///  Ignored for non-CPU-visible allocations.
+        uint64 placeholder1                 :  1;
+        uint64 startVaHintFlag              :  1; ///< startVaHintFlag is set to 1 for passing startVaHint address
+                                                  ///  to set baseVirtAddr as startVaHint for memory allocation.
+#if PAL_AMDGPU_BUILD
+        uint64 initializeToZero             :  1; ///< If set, PAL will request that the host OS zero-initializes
+                                                  ///  the allocation upon creation, currently, only GpuHeapLocal and
+                                                  ///  GpuHeapInvisible are supported.
+        uint64 discardable                  :  1; ///< If set, this gpu memory object can be discarded under memory
+                                                  ///  pressure without keeping the content.
+#else
+        uint64 placeholder2                 :  2;
+#endif
+        uint64 directCaptureSource          :  1; ///< Memory will be mapped to DirectCapture resource's KMD-managed
+                                                  ///  private VA.
+        uint64 reserved                     : 28; ///< Reserved for future use.
+    };
+    uint64     u64All;                            ///< Flags packed as 64-bit uint.
+};
+
+/// Specifies properties of a typed buffer pseudo-object. When this is specified in GpuMemoryCreateInfo along with the
+/// typedBuffer flag, the GPU memory object has been permanently cast as a single typed buffer.  A typed buffer is very
+/// similar to a linear 3D image: it has a format, extent, and row/depth pitch values.
+///
+/// Note that the typed buffer concept is used in other parts of the PAL interface and some of those instances may not
+/// require a permanent typed buffer association.  In such cases multiple typed buffers can be "bound" to one GPU memory
+/// object at arbitrary offsets without any need to set the typedBuffer flag or fill out a TypedBufferCreateInfo.
+struct TypedBufferCreateInfo
+{
+    SwizzledFormat swizzledFormat; ///< Pixel format and channel swizzle.
+    Extent3d       extent;         ///< Dimensions in pixels WxHxD.
+    uint32         rowPitch;       ///< Offset in bytes between the same X position on two consecutive lines.
+    uint32         depthPitch;     ///< Offset in bytes between the same X,Y position of two consecutive slices.
+    bool           depthIsSubres;  ///< True if the depth slices should be treated as an array of 2D subresources.
+};
+
+/// Specifies properties for @ref IGpuMemory creation.  Input structure to IDevice::CreateGpuMemory().
+///
+/// See the @ref IGpuMemory for additional restrictions on the size, alignment, vaRange, and descrVirtAddr fields.
+struct GpuMemoryCreateInfo
+{
+    GpuMemoryCreateFlags flags;     ///< GPU memory flags.
+    gpusize              size;      ///< Amount of GPU memory to allocate in bytes.
+    gpusize              alignment; ///< Byte alignment of the allocation's GPU VA. If zero, an alignment matching the
+                                    ///  allocation granularity will be used.
+    VaRange              vaRange;   ///< Virtual address range for the GPU memory allocation.
+
+    union
+    {
+        const IGpuMemory* pReservedGpuVaOwner; ///< Must be zero unless "useReservedGpuVa" is true.  It points to the
+                                               ///  memory object which previously reserved the GPU VA range to be used
+                                               ///  by the new memory object.
+        gpusize           descrVirtAddr;       ///< Must be zero unless vaRange is ShadowDescriptorTable, in which case
+                                               ///  it must specify the GPU VA of the corresponding DescriptorTable.
+                                               ///  It doesn't need to be the base VA of the DescriptorTable allocation
+                                               ///  but must be aligned to "alignment".
+        gpusize           replayVirtAddr;      ///< Must be zero unless vRange is CaptureReplay, in which case it must
+                                               ///  specify the GPU VA of the corresponding memory object.
+        gpusize           startVaHint;         ///< Client passes a start VA hint to set as baseVirtAddr. If the given
+                                               ///  hint is not properly aligned, find next higher aligned address as
+                                               ///  hint. If the hint is available and within right vaRange where
+                                               ///  vaRange is VaRange::Default then set baseVirtAddr as hint. If the
+                                               ///  hint is unavailable, find the higher available address between
+                                               ///  startVaHint and max vaRange. If any of the two cases are failed,
+                                               ///  set baseVirtAddr as normal.
+    };
+
+    GpuMemPriority       priority;       ///< Hint to the OS paging process on how important it is to keep this
+                                         ///  allocation in its preferred heap.
+    GpuMemPriorityOffset priorityOffset; ///< Offset from the base level priority. A higher offset means higher priority
+                                         ///  within same base Level. Currently supported on Windows only.
+    GpuMemMallPolicy     mallPolicy;     ///< Used to control whether or not this allocation will be accessed via the
+                                         ///  MALL (memory access last level).  Only valid if "supportsMall" is set in
+                                         ///  DeviceProperties.
+    GpuMemMallRange      mallRange;      ///< These parameters are only meaningful if flags.mallRangeActive is set.
+                                         ///  Any pages outside of this range will use the opposite MALL policy from
+                                         ///  what is specified in "mallPolicy".
+
+    /// Describes how the allocation will be accessed. If heapAccess is set to something other than @ref
+    /// GpuHeapAccessExplicit, then PAL decides the appropriate heap to allocate memory from based on this member and
+    /// @ref heaps is ignored. Otherwise heap selection respects the selection in @ref heaps.
+    GpuHeapAccess heapAccess;
+    uint32        heapCount;           ///< Number of entries in heaps[]. Must be 0 for virtual allocations.
+    GpuHeap       heaps[GpuHeapCount]; ///< List of allowed memory heaps, in order of preference. It will be ignored if
+                                       ///  @ref heapAccess is to something other than @ref GpuHeapAccessExplicit.
+
+    /// The pointer to an Image object the memory object will be bound to.  It must be specified in special cases where
+    /// a memory object is permanently linked to an Image such as presentable images or shared resources on Windows.
+    /// For other cases, it's highly encouraged to provide the image object pointer.
+    ///
+    /// When @ref compression is set to Default, clients can benefit from PAL's internal logic to choose the optimal
+    /// GFX12-style distributed compression setting (enabled or disabled) based on properties of this image (e.g., usage
+    /// flags); otherwise PAL will disable the compression by default when detecting nullptr @pImage.
+    IImage* pImage;
+
+    TypedBufferCreateInfo typedBufferInfo; ///< This struct must be filled out if the @tref typedBuffer flag is set.
+                                           ///  This GPU memory will be permanently considered a typed buffer.
+
+    VirtualGpuMemAccessMode virtualAccessMode; ///< Access mode for virtual GPU memory's unmapped pages, WDDM only.
+    gpusize                 surfaceBusAddr;    ///< Surface bus address of Bus Addresable Memory.
+                                               ///  Only valid when GpuMemoryCreateFlags::sdiExternal is set.
+    gpusize                 markerBusAddr;     ///< Marker bus address of Bus Addresable Memory. The client can:
+                                               ///  1. Write to marker
+                                               ///  2. Let GPU wait until a value is written to marker before issuing
+                                               ///     the next command.
+                                               ///  Only valid when GpuMemoryCreateFlags::sdiExternal is set.
+
+    /// Client override for GFX12-style distributed compression. Only meaningful on devices that set the
+    /// supportDistributedCompression DeviceProperties flag. By default, PAL will apply a heuristic to determine
+    /// whether or not to enable compression based on properties of the memory allocation (e.g., CPU-visibility or
+    /// properties of the attached pImage). This mode does not apply to virtual IGpuMemory objects (such object
+    /// inherit their compression behavior from their backing physical memory on a page-by-page basis).
+    ///
+    /// @note If allocating memory intended to back multiple resources (e.g., a heap for DX12 "placed resources") where
+    /// pImage is nullptr, the client should set compression to Enable then rely on per-resource or per-view controls to
+    /// disable compression for resources as needed (use compressionMode fields in ImageCreateInfo, BufferViewInfo,
+    /// ImageViewInfo, etc.).
+    ///
+    /// If clients intends to enable distributed compression on buffers, buffer base alignment and size should be
+    /// aligned to DCC minimum compression unit size (256 bytes); otherwise there is potential corruption issue.
+    /// e.g. An allocation holding two buffers (placed resource or driver suballocation). Suppose the first buffer has
+    /// size 4Kibytes+128 bytes with compression enabled and the second buffer has size 128bytes with compression
+    /// disabled (buffer view compressionMode is ReadBypassWriteDisable). When compressing the trailing 128bytes of
+    /// the first buffer, it will compress the second buffer as well. Reading the second buffer will result in corrupted
+    /// content.
+    TriState compression;
+};
+
+/// Specifies properties for @ref IGpuMemory creation.  Input structure to IDevice::CreatePinnedGpuMemory().
+///
+/// See the @ref IGpuMemory for additional restrictions on the size and vaRange fields.
+struct PinnedGpuMemoryCreateInfo
+{
+    const void*       pSysMem;   ///< Pointer to the system memory that should be pinned for GPU access.  Must be
+                                 ///  aligned to realMemAllocGranularity in DeviceProperties.
+    size_t            size;      ///< Amount of system memory to pin for GPU access.
+    VaRange           vaRange;   ///< Virtual address range for the GPU memory allocation.
+    gpusize           alignment; ///< Byte alignment of the allocation's GPU VA.  If zero, an alignment matching the
+                                 ///  Platform's allocation granularity will be used.
+    GpuMemMallPolicy  mallPolicy; ///< Used to control whether or not this allocation will be
+                                  ///  accessed via the MALL (memory access last level).  Only valid
+                                  ///  if "supportsMall" is set in DeviceProperties.
+    GpuMemMallRange   mallRange;  ///< These parameters are only meaningful if flags.mallRangeActive
+                                  ///  is set.  Any pages outside of this range will use the opposite
+                                  ///  MALL policy from what is specified in "mallPolicy".
+};
+
+/// Specifies properties for @ref IGpuMemory creation.  Input structure to IDevice::CreateSvmGpuMemory().
+///
+/// See the @ref IGpuMemory for additional restrictions on the size and alignment.
+struct SvmGpuMemoryCreateInfo
+{
+    GpuMemoryCreateFlags    flags;                 ///< GPU memory flags.
+    gpusize                 size;                  ///< Amount of SVM memory to allocate in bytes.
+                                                   ///  The total amount of SVM memory can't exceed the value set in
+                                                   ///  maxSvmSize when the platform is created.
+    gpusize                 alignment;             ///< Byte alignment of the allocation's SVM VA.  If zero, an
+                                                   ///  alignment matching the allocation granularity will be used.
+    const IGpuMemory*        pReservedGpuVaOwner;  ///< Must be zero unless "useReservedGpuVa" is true.  It points to
+                                                   ///  the memory object which previously reserved the GPU VA range
+                                                   ///  to be used by the new memory object.
+    bool                    isUsedForKernel;       ///< Memory will be used to store kernel and execute on gpu.
+    GpuMemMallPolicy         mallPolicy;           ///< Used to control whether or not this allocation will be
+                                                   ///  accessed via the MALL (memory access last level).  Only valid
+                                                   ///  if "supportsMall" is set in DeviceProperties.
+    GpuMemMallRange          mallRange;           ///< These parameters are only meaningful if flags.mallRangeActive
+                                                  ///  is set.  Any pages outside of this range will use the opposite
+                                                  ///  MALL policy from what is specified in "mallPolicy".
+};
+
+/// Specifies parameters for opening a shared GPU memory object on another device.
+struct GpuMemoryOpenInfo
+{
+    IGpuMemory* pSharedMem; ///< Shared GPU memory object from another device to open.
+};
+
+/// Specifies parameters for opening a GPU memory object on another device for peer-to-peer memory transfers.
+struct PeerGpuMemoryOpenInfo
+{
+    IGpuMemory* pOriginalMem; ///< GPU memory object from another device to open for peer-to-peer memory transfers.
+};
+
+/// Specifies parameters for opening another non-PAL device's gpu memory for access from this device.  Input structure to
+/// IDevice::OpenExternalSharedGpuMemory().
+struct ExternalGpuMemoryOpenInfo
+{
+    ExternalResourceOpenInfo resourceInfo;    ///< Information describing the external gpuMemory.
+    TypedBufferCreateInfo    typedBufferInfo; ///< Information describing the typed buffer information.
+    GpuMemMallPolicy         mallPolicy;      ///< Used to control whether or not this allocation will be accessed via
+                                              ///  the MALL (memory access last level). Only valid if "supportsMall" is
+                                              ///  set in DeviceProperties.
+    GpuMemMallRange          mallRange;       ///< These parameters are only meaningful if flags.mallRangeActive is set.
+                                              ///  Any pages outside of this range will use the opposite MALL policy
+                                              ///  from what is specified in "mallPolicy".
+    union
+    {
+        struct
+        {
+            uint32 typedBuffer     :  1;  ///< GPU memory will be permanently considered a single typed buffer pseudo-object
+                                          ///  with the properties given in typedBufferInfo.
+            uint32 gl2Uncached     :  1;  ///< Specifies the GPU Memory is un-cached on GPU L2 cache.
+            uint32 mallRangeActive :  1;  ///< If set, then this allocation will be partially allocated in the MALL.
+                                          ///  If this is set, then the mallPolicy enumeration must be set to either
+                                          ///  "always" or "never".
+            uint32 reserved        : 29;  ///< Reserved for future use.
+        };
+        uint32 u32All;              ///< Flags packed as 32-bit uint.
+    } flags;                        ///< External Gpu memory open info flags.
+};
+
+/// The fundemental information that describes a GPU memory object that is stored directly in each IGpuMemory.
+/// It can be accessed without a virtual call via IGpuMemory::Desc().
+struct GpuMemoryDesc
+{
+    gpusize gpuVirtAddr;         ///< GPU virtual address of the GPU memory allocation.
+    gpusize size;                ///< Size of the GPU memory allocation, in bytes.
+    gpusize clientSize;          ///< Size of the client requested GPU memory allocation, in bytes.
+    gpusize alignment;           ///< Required GPU virtual address alignment, in bytes.
+    uint32  heapCount;           ///< Number of entries in heaps[].  Must be 0 for virtual allocations.
+    GpuHeap heaps[GpuHeapCount]; ///< List of preferred memory heaps, in order of preference.
+    gpusize surfaceBusAddr;      ///< Bus Address of SDI memory surface and marker. These will not be initialized
+    gpusize markerBusAddr;       ///  until the memory is made resident. Client needs to call
+                                 ///  InitBusAddressableGpuMemory() to query and update before this is valid.
+    union
+    {
+        struct
+        {
+            uint32 isVirtual    :  1; ///< GPU memory is not backed by physical memory and must be remapped before the
+                                      ///  GPU can safely access it. Will also be set for sdiExternal allocations. See
+                                      ///  GpuMemoryCreateFlags::sdiExternal
+            uint32 isPeer       :  1; ///< GPU memory object was created with @ref IDevice::OpenPeerGpuMemory.
+            uint32 isShared     :  1; ///< GPU memory object was created either with
+                                      ///  @ref IDevice::OpenExternalSharedGpuMemory or OpenSharedGpuMemory.
+                                      ///  This IGpuMemory references memory created either by another process or
+                                      ///  another device with the exception of peer access.
+            uint32 isExternal   :  1; ///< GPU memory object was created with @ref IDevice::OpenExternalSharedGpuMemory.
+                                      ///  This IGpuMemory references memory that was created either by another process
+                                      ///  or by a device that doesn't support sharedMemory with this object's device
+                                      ///  (i.e., MDA sharing on Windows).
+            uint32 isSvmAlloc   :  1; ///< GPU memory is allocated in system memory.
+                                      /// Valid only when IOMMUv2 is supported
+            uint32 isExecutable :  1; ///< GPU memory is used for execution. Valid only when IOMMUv2 is supported
+            uint32 isExternPhys :  1; ///< GPU memory is External Physical memory
+
+            uint32 placeholder0        :  1; ///< Reserved for future memory flag
+
+            uint32 isCompressed :  1; ///< Set for physical allocations where UMD requested PTE.D=1 to enable
+                                      ///  GFX12-style distributed compression.
+            uint32 reserved     : 23; ///< Reserved for future use
+        };
+        uint32 u32All;               ///< Flags packed as 32-bit uint.
+    } flags;                         ///< GPU memory desc flags.
+
+    uint64 uniqueId; ///< Unique ID given to each GPU memory object, allows client tracking of GPU memory allocations.
+};
+
+/// Defines GPU memory sub allocation info. Contains a GPU memory handle to the whole memory. And the offset and size
+/// shows where is the sub allocated memory.
+struct GpuMemSubAllocInfo
+{
+    gpusize     address;     ///< Start address of the memory, not including the offset.
+    gpusize     offset;      ///< Offset from the start address of the memory.
+    gpusize     size;        ///< Size of the memory.
+};
+
+/// Specifies a GPU memory object and flags with more specific usage details.  An array of these structures is specified
+/// to PAL residency operations.
+///
+/// @see IDevice::AddGpuMemoryReferences
+/// @see IQueue::Submit
+struct GpuMemoryRef
+{
+    union
+    {
+        struct
+        {
+            uint32 readOnly :  1;  ///< The allocation will not be written using this reference.
+            uint32 reserved : 31;  ///< Reserved for future use.
+        };
+        uint32 u32All;             ///< Flags packed as 32-bit uint.
+    } flags;                       ///< GPU memory reference flags.
+
+    IGpuMemory* pGpuMemory;        ///< The GPU memory object referenced by this residency operation.
+};
+
+/// Specifies a Display Output Post-Processing (DOPP) allocation that will be referenced by a submission along with
+/// additional info describing how it will be used.
+///
+/// @see IQueue::Submit
+struct DoppRef
+{
+    union
+    {
+        struct
+        {
+            uint32 pfpa        :  1;  ///< Access to this DOPP allocation will be redirected to the primary pending
+                                      ///  present (i.e., pre-flip primary access).  If not set, access will
+                                      ///  refer to the current onscreen primary.
+            uint32 lastPfpaCmd :  1;  ///< This submission will be the last access of this pfpa allocation
+                                      ///  for this frame.  The pfpa interval will end once this submit
+                                      ///  completes, allowing the corresponding vidPnSource to flip.
+                                      ///  This flag is invalid if the pfpa flag is not set.
+            uint32 reserved    : 30;  ///< Reserved for future use.
+        };
+        uint32 u32All;             ///< Flags packed as 32-bit uint.
+    } flags;                       ///< GPU memory reference flags.
+
+    IGpuMemory* pGpuMemory;        ///< The GPU memory object referenced by this residency operation.
+};
+
+/// Specifies the types of the exporting memory.
+enum class ExportHandleType : uint32
+{
+    Default        = 0,            ///< Let PAL choose the export type
+#if PAL_AMDGPU_BUILD
+    FileDescriptor,                ///< Export using a Linux file descriptor
+    Kms,                           ///< Export through KMS
+#endif
+};
+
+/// Specifies parameters for export a GPUMemory NT handle from its name.
+struct GpuMemoryExportInfo
+{
+#if PAL_KMT_BUILD
+    const _SECURITY_ATTRIBUTES* pSecurityAttributes; ///< It specifies the security descriptor and the inheritable
+                                                     ///  attribute.
+    const wchar_t*              pNtObjectName;       ///< A name to NT handle, if the object is exported as a NT
+                                                     ///  handle with a name, and then the handle can be acquired
+                                                     ///  via this name.
+    uint32                      accessFlags;         ///< Desried access rights of GPU memory.
+#endif
+    ExportHandleType            exportType;         ///< Type of handle to use for exporting the memory.
+};
+
+/**
+ ***********************************************************************************************************************
+ * @interface IGpuMemory
+ * @brief     Interface representing a GPU-accessible memory allocation.
+ *
+ * Depending on creation parameters, this could correspond to:
+ *
+ * + A _real_ memory object, corresponding directly to a physical allocation made on this device (whether it resides in
+ *   a local or non-local heap).
+ * + A _virtual_ memory object, only consisting of virtual address space that can be mapped on a page basis to pages in
+ *   _real_ memory objects via IQueue::RemapVirtualMemoryPages.
+ * + Pinned memory, a _real_ memory object created by pinning down client system memory.
+ * + Peer memory, a _real_ memory object corresponding to GPU memory that is likely local to another GPU.  Only copy
+ *   operations (peer-to-peer transfers) are allowed with this memory.
+ * + Opened/shared memory, a _real_ memory object that is fully shared between multiple GPUs, residing in a non-local
+ *   heap.
+ * + External shared memory, a _real_ memory object that was created by an external process and is fully shared between
+ *   multiple GPUs.
+ *
+ * @see IDevice::CreateGpuMemory
+ * @see IDevice::CreatePinnedGpuMemory
+ * @see IDevice::OpenSharedGpuMemory
+ * @see IDevice::OpenPeerGpuMemory
+ * @see IDevice::OpenExternalSharedGpuMemory
+ *
+ *
+ * All of these kinds of GPU memory are assigned a set of fundemental properties specified in GpuMemoryDesc which are
+ * either specified by the client or by PAL.  There are specific rules these properties must follow; those rules are
+ * documented here to avoid duplication.  Violating these rules will cause the device's corresponding "get size"
+ * functions to return an error code, the create/open functions may not validate their arguments.
+ *
+ *
+ * With the exception of external memory objects being opened, PAL will adjust size and base alignments as necessary
+ * to meet device requirements. Typically this means going out to OS page boundaries. The client is no longer required
+*  to query device requirements and align for PAL.
+ *
+ * Note that the device alignment requirements apply equally to GPU VAs.  However, other kinds of alignment
+ * restrictions (e.g., IGpuMemoryBindable's requirements) may only apply to one of those two properties.  When creating
+ * GPU memory objects the client must be careful to set the "alignment" field to the alignment of the GPU VA.
+ *
+ *
+ * Second, the client can't directly specify a memory object's GPU VA but must specify its VA range, limiting which
+ * portions of the VA space can be used.  Note that non-external shared and peer GPU memory objects will use the
+ * original memory's VA range.  External shared GPU memory always uses the default VA range.
+ *
+ * The ShadowDescriptorTable VA range is special because it pairs the shadow GPU memory to an existing descriptor GPU
+ * memory.  The client must specify the GPU VA of the corresponding DescriptorTable memory when creating a shadow GPU
+ * memory object via descrVirtAddr; it must satisfy the alignment requirements of the shadow GPU memory.  Both GPU
+ * memory objects must be created on the same device.  Note that descrVirtAddr can be offset into the descriptor
+ * allocation such that multiple shadow GPU memory objects correspond to one larger descriptor GPU memory object.
+ *
+ *
+ * The client can further influence the GPU VA of shared and peer GPU memory objects. If the globalGpuVa flag is set
+ * when the original GPU memory object is created, PAL will assign any shared or peer GPU memory objects that same VA.
+ * Note that globalGpuVa is only supported if globalGpuVaSupport is set in DeviceProperties.
+ ***********************************************************************************************************************
+ */
+class IGpuMemory : public IDestroyable
+{
+public:
+    /// Sets a new priority for this GPU memory object.
+    ///
+    /// This call is not available for virtual or pinned memory.
+    ///
+    /// @param [in] priority       New base priority for the GPU memory object.
+    ///
+    /// @param [in] priorityOffset New priority offset for the GPU memory object. This is a small bias that can be
+    ///                            used by the OS to raise the importance of an allocation when there are
+    ///                            multiple allocations in the same base priority level. You can think of it as
+    ///                            the fractional bits of the priority level.
+    ///
+    /// @returns Success if the priority was successfully updated.  Otherwise, one of the following errors may be
+    ///          returned:
+    ///          + ErrorUnavailable if this is a virtual or pinned GPU memory object.
+    virtual Result SetPriority(
+        GpuMemPriority       priority,
+        GpuMemPriorityOffset priorityOffset) = 0;
+
+    /// Makes the GPU memory available for CPU access and gives the client a pointer to reference it.
+    ///
+    /// The allocation should be unmapped by the client once CPU access is complete, although it _is_ legal to keep an
+    /// allocation mapped while the GPU references the allocation from a command buffer.
+    ///
+    /// It is legal to map the allocation multiple times concurrently.  Mapping is not available for pinned or virtual
+    /// memory objects.  This call is thread safe for calls referencing this memory object.
+    ///
+    /// @see Unmap.
+    ///
+    /// @param [out] ppData CPU pointer to the GPU memory object.
+    ///
+    /// @returns Success if the map succeeded.  Otherwise, *ppData will not be valid and one of the following errors may
+    ///          be returned.
+    ///          + ErrorInvalidPointer if ppData is null.
+    ///          + ErrorGpuMemoryMapFailed if the object is busy and cannot be mapped by the OS.
+    ///          + ErrorNotMappable if the memory object cannot be mapped due to some of its heaps not having the CPU
+    ///            visible flag set.
+    ///          + ErrorUnavailable if the memory object is not a real allocation.
+    virtual Result Map(
+        void** ppData) = 0;
+
+    /// Removes CPU access from a previously mapped GPU memory object.
+    ///
+    /// This call is thread safe for calls referencing the same memory object.
+    ///
+    /// @see Map
+    ///
+    /// @returns Success if the unmap succeeded.  Otherwise, one of the following errors may be returned:
+    ///          + ErrorGpuMemoryUnmapFailed if the GPU memory object cannot be unlocked.
+    ///          + ErrorUnavailable if the GPU memory object is not a real allocation.
+    virtual Result Unmap() = 0;
+
+#if PAL_KMT_BUILD || PAL_AMDGPU_BUILD
+    /// Returns an OS-specific handle which can be used to refer to this GPU memory object across processes. This will
+    /// return a null or invalid handle if the object was not created with the @ref interprocess create flag set.
+    ///
+    /// @note This function is only available for Linux builds or KMT builds.
+    ///
+    /// @param [in] handleInfo       The info is used to open handle.
+    ///
+    /// @returns An OS-specific handle which can be used to access the GPU memory object across processes.
+    virtual OsExternalHandle ExportExternalHandle(const GpuMemoryExportInfo& exportInfo) const = 0;
+#endif
+
+    /// Returns a structure containing some fundemental information that describes this GPU memory object.
+    ///
+    /// @returns A reference to this allocation's GpuMemoryDesc.
+    const GpuMemoryDesc& Desc() const { return m_desc; }
+
+    /// Returns the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @returns Pointer to client data.
+    void* GetClientData() const
+    {
+        return m_pClientData;
+    }
+
+    /// Sets the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @param  [in]    pClientData     A pointer to arbitrary client data.
+    void SetClientData(
+        void* pClientData)
+    {
+        m_pClientData = pClientData;
+    }
+
+    /// Set SDI remote surface bus address and marker bus address.
+    ///
+    /// This GPU memory object must have been created with the sdiExternal flag set and with the GpuMemoryCreateInfo
+    /// surfaceBusAddr and markerBusAddr fields both set to zero. This function allows clients to defer setting those
+    /// addresses until after creation. It must be called exactly once to permanently bind the given SDI addresses to
+    /// this GPU memory object.
+    ///
+    /// @warning An sdiExternal GPU memory object is not complete until its given its SDI addresses! The gpuVirtAddr
+    ///          field in this GPU memory's GpuMemoryDesc will not be valid until this function is called!
+    ///
+    /// @param [in] surfaceBusAddr Surface bus address of Bus Addressable Memory.
+    /// @param [in] markerBusAddr  Marker bus address of Bus Addressable Memory. The client can write to the marker
+    ///                            and have the GPU wait until a value is written to marker before continuing.
+    ///
+    /// @returns Success if succeeded. Otherwise, one of the following errors may be returned:
+    ///          + ErrorUnavailable if the GPU memory object is not external physical memory or it has already been set.
+    ///          + ErrorInvalidValue if one of the input params is 0.
+    ///          + One of the escape call failed error.
+    virtual Result SetSdiRemoteBusAddress(gpusize surfaceBusAddr, gpusize markerBusAddr) = 0;
+
+protected:
+    /// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
+    /// called the proper create method.
+    IGpuMemory() : m_pClientData(nullptr) {}
+
+    /// @internal Destructor.  Prevent use of delete operator on this interface.  Client must destroy objects by
+    /// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
+    /// object on their own.
+    virtual ~IGpuMemory() { }
+
+    GpuMemoryDesc m_desc; ///< Information that describes this GPU memory object.
+
+private:
+    /// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
+    /// and set via SetClientData().
+    /// For non-top-layer objects, this will point to the layer above the current object.
+    void* m_pClientData;
+};
+
+} // Pal
@@ -0,0 +1,139 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palGpuMemoryBindable.h
+ * @brief Defines the Platform Abstraction Library (PAL) IGpuMemoryBindable interface and related types.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palDestroyable.h"
+
+namespace Pal
+{
+
+// Forward declarations.
+class IGpuMemory;
+
+/// Reports required properties of a GPU memory object bound to a specific object.  The client must query these
+/// properties via IGpuMemoryBindable::GetGpuMemoryRequirements() and bind an @ref IGpuMemory object matching these
+/// requirements to the @ref IGpuMemoryBindable object using IGpuMemoryBindable::BindGpuMemory().
+struct GpuMemoryRequirements
+{
+    union
+    {
+        struct
+        {
+            uint32 cpuAccess : 1;  ///< CPU access is required. If set, the client must not set cpuInvisible in
+                                   ///  GpuMemoryCreateFlags and must provide CPU visible heaps or CPU visible heap
+                                   ///  access mode. If not set, it's strongly recommended to set cpuInvisible.
+            uint32 reserved : 31;  ///< Reserved for future use.
+        };
+        uint32 u32All;             ///< Flags packed as 32-bit uint.
+    } flags;                       ///< Flags specifying required GPU memory properties.
+
+    gpusize size;                  ///< Amount of GPU memory required, in bytes.
+    gpusize alignment;             ///< Required GPU memory virtual address alignment, in bytes.
+    uint32  heapCount;             ///< Number of valid entries in heaps[].
+    GpuHeap heaps[GpuHeapCount];   ///< List of allowed heaps for the GPU memory in order of predicted performance.
+};
+
+/**
+ ***********************************************************************************************************************
+ * @interface IGpuMemoryBindable
+ * @brief     Interface inherited by objects that may require GPU memory be bound to them.
+ *
+ * In the future, PAL may discover a need to allocate GPU memory for a class that currently doesn't require it.  In that
+ * situation, that class will be updated to inherit from IGpuMemoryBindable.  This change would break backward
+ * compatibility and would result in the major interface version being incremented.
+ ***********************************************************************************************************************
+ */
+class IGpuMemoryBindable : public IDestroyable
+{
+public:
+    /// Queries the GPU memory properties required by this object.  The client should query properties with this method,
+    /// create/sub-allocate a memory range matching the requirements, then bind the memory to the object via
+    /// @ref BindGpuMemory().
+    ///
+    /// @note Not all objects may actually need GPU memory, and in that case the memory properties will reflect a 0 size
+    ///       and alignment.
+    ///
+    /// @param [out] pGpuMemReqs Required properties of GPU memory to be bound to this object.  Includes properties like
+    ///                          size, alignment, and allowed heaps.
+    virtual void GetGpuMemoryRequirements(
+        GpuMemoryRequirements* pGpuMemReqs) const = 0;
+
+    /// Binds GPU memory to this object according to the requirements queried via GetGpuMemoryRequirements().
+    ///
+    /// Binding memory to objects other than images automatically initializes the object memory as necessary. Image
+    /// objects used as color or depth-stencil targets have to be explicitly initialized in command buffers using a
+    /// ICmdBuffer::CmdReleaseThenAcquire() command to transition them out of the LayoutUninitializedTarget usage.
+    ///
+    /// Binding memory to an object automatically unbinds any previously bound memory. There is no need to bind null to
+    /// an object to explicitly unbind a previously bound allocation before binding a new allocation.
+    ///
+    /// This call is invalid on objects that have no memory requirements, even if binding null.
+    ///
+    /// @param [in] pGpuMemory GPU memory to be bound.  If null, the previous binding will be released.
+    /// @param [in] offset     Offset into the GPU memory where the object's memory range should begin.  This allows
+    ///                        sub-allocating many object's GPU memory from the same IGpuMemory object.
+    ///
+    /// @returns Success if the specified GPU memory was successfully bound to the object.  Otherwise, one of the
+    ///          following errors may be returned:
+    ///          + ErrorUnavailable if binding a non-image to a virtual allocation.
+    ///          + ErrorInvalidAlignment if the offset does not match the alignment requirements of the object.
+    ///          + ErrorInvalidMemorySize if the object's required memory size does not fit completely within the given
+    ///            memory object at the specified offset.
+    virtual Result BindGpuMemory(
+        IGpuMemory* pGpuMemory,
+        gpusize     offset) = 0;
+
+    /// Returns the GPU memory object and offset that this object is bound to or nullptr and 0 if not bound.
+    ///
+    /// @param [out] ppGpuMemory Returns the GPU memory object to the address specified in this pointer.
+    ///                          Returns nullptr if this object is not bound to any GPU memory.
+    /// @param [out] pOffset     Returns the GPU memory offset to the address specified in this pointer.
+    ///                          Returns 0 if this object is not bound to any GPU memory.
+    ///
+    /// @returns Success if the GPU memory and offset was successfully returned. Otherwise, one of the following errors
+    ///          may be returned:
+    ///          + ErrorGpuMemoryNotBound if this object is not bound to any GPU memory.
+    ///          + ErrorInvalidPointer if either ppGpuMemory or pOffset is nullptr.
+    ///          + ErrorUnavailable if binding is not supported in the derived class
+    virtual Result GetGpuMemory(
+        IGpuMemory** ppGpuMemory,
+        gpusize* pOffset) const = 0;
+
+protected:
+    /// @internal Destructor.  Prevent use of delete operator on this interface.  Client must destroy objects by
+    /// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
+    /// object on their own.
+    virtual ~IGpuMemoryBindable() { }
+};
+
+} // Pal
@@ -0,0 +1,983 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palImage.h
+ * @brief Defines the Platform Abstraction Library (PAL) IImage interface and related types.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palGpuMemoryBindable.h"
+
+namespace Pal
+{
+
+// Forward declarations.
+class      IImage;
+class      IPrivateScreen;
+class      IScreen;
+class      ISwapChain;
+enum class CompressionMode : uint32;
+enum class ClientCompressionMode : uint32;
+enum SwizzleMode : uint32;
+struct ImageCopyRegion;
+
+/// When used as the value of the viewFormatCount parameter of image creation it indicates that all compatible formats
+/// can be used for views of the created image.
+constexpr uint32 AllCompatibleFormats = UINT32_MAX;
+
+/// Specifies dimensionality of an image (i.e., 1D, 2D, or 3D).
+enum class ImageType : uint32
+{
+    Tex1d = 0x0,
+    Tex2d = 0x1,
+    Tex3d = 0x2,
+    Count
+};
+
+/// Specifies the tiling (address swizzling) to use for an image. When a linear tiled image is mapped its contents will
+/// be laid out in row-major ordering. All other tiling modes require the use of swizzles equation to locate texels.
+enum class ImageTiling : uint32
+{
+    Linear       = 0x0,  ///< Image is laid out in scan-line (row-major) order.
+    Optimal      = 0x1,  ///< Image is laid out in a GPU-optimal order.
+    Standard64Kb = 0x2,  ///< Image is laid out in the cross-IHV, 64KB, standard swizzle tiling.
+    Count
+};
+
+/// Hints to pal to identify a preference for how this image is organized. This is a preference setting, and may be
+/// ignored if pal believes better options exist.
+enum class ImageTilingPattern : uint32
+{
+    Default     = 0x0,  ///< No swizzle mode is preferred.
+    Standard    = 0x1,  ///< Prefer standard swizzle modes.
+    XMajor      = 0x2,  ///< Prefer x-coordinate major swizzle modes.
+    YMajor      = 0x3,  ///< Prefer y-coordinate major swizzle modes.
+    Interleaved = 0x4,  ///< Prefer interleaved coordinate swizzle modes.
+    Count
+};
+
+/// Hints to pal to select the appropriate tiling mode for a optimization target.
+enum class TilingOptMode : uint32
+{
+    Balanced     = 0x0,  ///< Balance memory foorprint and rendering performance.
+    OptForSpace  = 0x1,  ///< Optimize tiling mode for saving memory footprint
+    OptForSpeed  = 0x2,  ///< Optimize tiling mode for rendering performance.
+    Count
+};
+
+/// Image metadata modes.
+enum class MetadataMode : uint16
+{
+    Default = 0,  ///< Default behavior. PAL chooses if metadata should be present or not.
+    ForceEnabled, ///< Optimization Hint: The client would prefer Metadata if possible. Useful for scenarios where
+                  ///  metadata isn't an obvious win and clients can enable based on some hueristic or app-detect.
+    Disabled,     ///< The Image will not contain any compression metadata.
+    FmaskOnly,    ///< The color msaa Image will only contain Cmask/Fmask metadata; this mode is only valid for color
+                  ///  msaa Image. On GPUs with GFX12-style distributed compression (see supportDistributedCompression
+                  ///  flag in @ref DeviceProperties), metadataMode only controls UMD metadata (Hi-Z and Hi-S). On such
+                  ///  GPUs, the FmaskOnly enum is ignored and treated like Default.
+    Count,
+};
+
+/// Image metadata TC compat modes.
+enum class MetadataTcCompatMode : uint16
+{
+    Default = 0,        ///< Default behavior.  PAL chooses if TC compat should be enabled (if compressed).
+    ForceEnabled,       ///< Optimization Hint:  Tells PAL that the client would prefer Metadata is TC compat.
+    Disabled,           ///< Optimization Hint:  Tells PAL that the client would prefer Metadata is not TC compat.
+    Count,
+};
+
+/// Image shared metadata support level
+enum class MetadataSharingLevel : uint32
+{
+    FullExpand  = 0,    ///< The metadata need to be fully expanded at ownership transition time.
+    ReadOnly    = 1,    ///< The metadata are expected to have read-only usage after the ownership is transitioned.
+    FullOptimal = 2,    ///< The metadata can remain as-is if possible at ownership transition time.
+};
+
+/// Specifies the type of PRT map image being created.
+enum class PrtMapType : uint32
+{
+    None            = 0, ///< This is not an auxillary image used for PRT plus functionality.
+    Residency       = 1, ///< Image data is really a low-resolution map containing the finest populated LOD
+                         ///  for a particular UV space region.
+    SamplingStatus  = 2, ///< Indicates the validity of a given tile on a per-mip level basis.
+    Count,
+};
+
+/// Specifies how to interpret a clear color.
+enum class ClearColorType : uint32
+{
+    Uint  = 0, ///< The color is stored as an unsigned integer in RGBA order in u32Color. It will be swizzled and
+               ///  compacted before it is written to memory.
+    Sint  = 1, ///< The color is stored as a signed integer in RGBA order in i32Color. It will be swizzled and
+               ///  compacted before it is written to memory.
+    Float = 2, ///< The color is stored as floating point in RGBA order. It will be swizzled and converted to the
+               ///  appropriate numeric format before it is written to memory.
+    Yuv   = 3, ///< The color is stored as an unsigned integer in YUVA order in u32Color. It will be swizzled and
+               ///  compacted before it is written to memory. The client must clamp the clear color within the
+               ///  valid range, e.g. [0, 255] for 8-bit.
+    Count
+};
+
+/// Contains everything necessary to store and interpret a clear color.
+struct ClearColor
+{
+    ClearColorType type;                   ///< How to interpret this clear color.
+    uint8 disabledChannelMask;             ///< This 4 bits are used to selectively disable the A,B,G,R channels
+                                           ///  from being written. 0 means write ABRG. 0xF means write nothing.
+                                           ///  0x8 means write Blue, Green, Red. 0x7 means write Alpha. etc...
+
+    union
+    {
+        uint32 u32Color[4]; ///< The clear color, interpreted as four unsigned integers.
+        float  f32Color[4]; ///< The clear color, interpreted as four floating point values.
+    };
+};
+
+/// Specifies a set of image creation flags.
+union ImageCreateFlags
+{
+    struct
+    {
+        uint32 invariant               :  1; ///< Images with this flag set and all other creation identical are
+                                             ///  guaranteed to have a consistent data layout.
+        uint32 cloneable               :  1; ///< Image is valid as a source or destination of a clone operation.
+                                             ///  See @ref IDevice::ImagePrefersCloneCopy() for more details.
+        uint32 shareable               :  1; ///< Image can be shared between compatible devices.
+        uint32 presentable             :  1; ///< Indicates this image can be used in presents.
+        uint32 flippable               :  1; ///< Image can be used for flip presents.
+        uint32 stereo                  :  1; ///< Indicates AMD quad buffer stereo extension (AQBS extension) image
+        uint32 dxgiStereo              :  1; ///< Indicates DXGI stereo (Win8 stereo) image
+        uint32 cubemap                 :  1; ///< Image will be used as a cubemap.
+        uint32 prt                     :  1; ///< Image is a partially resident texture (aka, sparse image or tiled
+                                             ///  resource)
+        uint32 needSwizzleEqs          :  1; ///< Image requires valid swizzle equations.
+        uint32 perSubresInit           :  1; ///< The image may have its subresources initialized independently using
+                                             ///  barrier calls out of the uninitialized layout.
+        uint32 separateDepthPlaneInit  :  1; ///< If set, the caller may transition the stencil and depth planes from
+                                             ///  "Uninitialized" state at any time.  Otherwise, both planes must be
+                                             ///  transitioned in the same barrier call.  Only meaningful if
+                                             /// "perSubresInit" is set.
+        uint32 repetitiveResolve       :  1; ///< Optimization: Is this image resolved multiple times to an image which
+                                             ///  is mostly similar to this image?
+        uint32 preferSwizzleEqs        :  1; ///< Image prefers valid swizzle equations, but an invalid swizzle
+                                             ///  equation is also acceptable.
+        uint32 fixedTileSwizzle        :  1; ///< Fix this image's tile swizzle to ImageCreateInfo::tileSwizzle. This
+                                             ///  is only supported for single-sampled color images.
+        uint32 videoReferenceOnly      :  1; ///< Image is used by video hardware for reference buffer only.
+                                             ///  It uses a different tiling format than the decoder output buffer.
+        uint32 optimalShareable        :  1; ///< Indicates metadata information is to be added into private data on
+                                             ///  creation time and honored on open time.
+        uint32 sampleLocsAlwaysKnown   :  1; ///< Sample pattern is always known in client driver for MSAA depth image.
+        uint32 fullResolveDstOnly      :  1; ///< Indicates any ICmdBuffer::CmdResolveImage using this image as a
+                                             ///  desination will overwrite the entire image (width and height of
+                                             ///  resolve region is same as width and height of resolve dst).
+        uint32 fullCopyDstOnly         :  1; ///< Indicates any copy to this image will overwrite the entire image.
+                                             ///  A perf optimization of using post-copy metadata fixup to replace heavy
+                                             ///  expand at barrier to LayoutCopyDst. Unsafe to enable it if there is
+                                             ///  potential partial copy to the image.
+        uint32 pipSwapChain            :  1; ///< Indicates this image is PIP swap-chain. It is only supported on
+                                             ///  Windows platforms.
+        uint32 view3dAs2dArray         :  1; ///< If set client can view 3D image as 2D with its depth as array slices.
+                                             ///  Note that not all 3D images supports it. The image creation will
+                                             ///  return error if we fail to create a compatible image.
+
+        uint32 tmzProtected            :  1; ///< Indicate this image is protected or not.
+        uint32 sharedWithMesa          :  1; ///< Indicate this Image was opened from a Mesa shared Image
+        uint32 enable256KBSwizzleModes :  1; ///< Enable 256 KiB swizzle modes
+        uint32 hasModifier             :  1; ///< Set if the image uses drm format modifier.
+        uint32 disableDccStateTracking :  1; ///< Disable a PAL optimization which is commonly broken by app bugs.
+                                             ///  Setting this flag may increase DCC decompress overhead.
+#if PAL_CLIENT_EXAMPLE
+        uint32 useFixedSwizzleMode     :  1; ///< If set, require the fixed swizzle mode provided.
+                                             ///  Fails creation on incompatible swizzles.
+#else
+        uint32 reservedSwMode          :  1; ///< Reserved for future use.
+#endif
+        uint32 reserved                :  4; ///< Reserved for future use.
+    };
+    uint32 u32All;                           ///< Flags packed as 32-bit uint.
+};
+
+/// Specifies a set of ways an image might be used by the GPU (color target, shader read, etc.).
+union ImageUsageFlags
+{
+    struct
+    {
+        uint32 shaderRead             :  1; ///< Image will be read from shader (i.e., texture).
+        uint32 shaderWrite            :  1; ///< Image will be written from a shader (i.e., UAV).
+        uint32 resolveSrc             :  1; ///< Image will be used as resolve source image
+        uint32 resolveDst             :  1; ///< Image will be used as resolve dst image
+        uint32 colorTarget            :  1; ///< Image will be bound as a color target.
+        uint32 depthStencil           :  1; ///< Image will be bound as a depth/stencil target.
+        uint32 noStencilShaderRead    :  1; ///< Image will be neither read as stencil nor resolved on stencil plane.
+                                            ///  Note that if resolveSrc bit has been set to indicate that the image
+                                            ///  could be adopted as a resolveSrc image and there could be stencil
+                                            ///  resolve, noStencilShaderRead must be set to 0, since shader-read
+                                            ///  based stencil resolve might be performed.
+        uint32 hiZNeverInvalid        :  1; ///< Hint to PAL indicating the client will guarantee that no operations
+                                            ///  performed on this Image while it is in a decompressed state will cause
+                                            ///  Hi-Z metadata to become invalid. This allows PAL to avoid an expensive
+                                            ///  resummarization blit in some resource barriers.
+        uint32 depthAsZ24             :  1; ///< Use a 24-bit format for HW programming of a native 32-bit surface.
+                                            ///  If set, border color and Z-reference values are treated as Z-24.
+        uint32 firstShaderWritableMip :  4; ///< Only relevant if the shaderWrite flag is set. Typically set to 0 so
+                                            ///  entire image is writable. If non0, such as an image where only level0
+                                            ///  is used as a color target and compute is used to generate mipmaps,PAL
+                                            ///  may be able to enable additional compression on the baseLevels which
+                                            ///  are used exclusively as color target and shader read.
+        uint32 cornerSampling         :  1; ///< Set if this image will use corner sampling in image-read scenarios.
+                                            ///  With corner sampling, the extent refers to the number of pixel corners
+                                            ///  which will be one more than the number of pixels.  Border color is
+                                            ///  ignored when corner sampling is enabled.
+
+        uint32 vrsDepth               :  1; ///< Set if this depth image will be bound when VRS rendering is enabled.
+        uint32 disableOptimizedDisplay:  1; ///< Do not create Display Dcc
+        uint32 useLossy               :  1; ///< Set if this image may use lossy compression.
+        uint32 stencilOnlyTarget      :  1; ///< This must be set if a stencil-only IDepthStencilView will be created
+                                            ///< for this image.
+        uint32 vrsRateImage           :  1; ///< This image is potentially used with CmdBindSampleRateImage
+        uint32 videoDecoder           :  1; ///< Indicating this Image is video decoder target
+        uint32 reserved               : 12; ///< Reserved for future use.
+    };
+    uint32 u32All;                          ///< Flags packed as 32-bit uint.
+};
+
+/// Specifies properties for @ref IImage creation.  Input structure to IDevice::CreateImage().
+///
+/// Note that by default PAL may instruct the hardware to swizzle the contents of an image in memory; if this occurs
+/// two images created with identical properties will not map their texels to the same offsets in GPU memory and may
+/// even have different sizes. At the expense of performance this behavior can be limited by setting the invariant flag,
+/// which guarantees that images with identical properties will have identical GPU memory layouts.
+///
+/// For single-sampled color images, there is a middle ground between these two modes. If the fixedTileSwizzle flag is
+/// set, PAL will use the tileSwizzle property instead of generating its own swizzle value. The tileSwizzle value must
+/// be obtained from the base subresource of a single-sampled color image with identical properties (excluding
+/// fixedTileSwizzle and tileSwizzle). This allows the client to force certain similar images to share the same GPU
+/// memory layouts without forcing all similar images to a single GPU memory layout.
+struct ImageCreateInfo
+{
+    ImageCreateFlags   flags;            ///< Image creation flags.
+    ImageUsageFlags    usageFlags;       ///< Image usage flags.
+    ImageType          imageType;        ///< Dimensionality of image (1D/2D/3D).
+    SwizzledFormat     swizzledFormat;   ///< Pixel format and channel swizzle.
+    Extent3d           extent;           ///< Dimensions in pixels WxHxD.
+    uint32             mipLevels;        ///< Number of mipmap levels.  Cannot be 0.
+    uint32             arraySize;        ///< Number of slices.  Set to 1 for non-array images.
+    uint32             samples;          ///< Number of coverage samples.  Set to 1 for single sample images.  Must be
+                                         ///  greater than or equal to the number of fragments.
+    uint32             fragments;        ///< Number of color/depth fragments.  Set to 1 for single sample images.
+    ImageTiling        tiling;           ///< Controls layout of pixels in the image.
+    ImageTilingPattern tilingPreference; ///< Controls preferred tile swizzle organization for this image.
+    TilingOptMode      tilingOptMode;    ///< Hints to pal to select the appropriate tiling mode.
+    uint32             tileSwizzle;      ///< If fixedTileSwizzle is set, use this value for the image's base swizzle.
+#if PAL_CLIENT_EXAMPLE
+    SwizzleMode        fixedSwizzleMode; ///< For directed image tests, force a particular swizzle mode.
+#endif
+
+    /// Metadata behavior mode for this image. On GPUs with GFX12-style distributed compression
+    /// (see supportDistributedCompression flag in @ref DeviceProperties), metadataMode only controls UMD metadata
+    /// (Hi-Z and Hi-S). On such GPUs, the FmaskOnly enum is ignored and treated like Default.
+    MetadataMode         metadataMode;
+    MetadataTcCompatMode metadataTcCompatMode; ///< TC compat mode for this image.
+
+    /// Distributed compression contains GL2/DF DCC compression and RB backend client compression which includes
+    /// fragment client compression (previous FMASK compression alike) on color MSAA images and Z Plane client
+    /// compression on depth stencil images. Only relevant if the backing memory pages enable compression, controllable
+    /// by client with @ref GpuMemoryCreateInfo::compression.
+    CompressionMode compressionMode;
+
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 876
+    /// Client compression is part of distributed compression (aka physical compression); it can only be enabled if
+    /// physical compression is enabled.
+    ///
+    /// On Gfx12, controls (legacy FMask based) color fragment compression and Z plane compression.
+    ClientCompressionMode clientCompressionMode; ///< Controls client compression behavior for this resource.
+#else
+    TriState              clientCompressionMode; ///< Controls client compression behavior for this resource.
+#endif
+
+    uint32 maxBaseAlign;      ///< Maximum address alignment for this image or zero for an unbounded alignment.
+    float  imageMemoryBudget; ///< The memoryBudget value used in SW addrlib to determine the minSizeBlk for textures.
+                              ///  It must be >= 0.0. When in [0.0, 1.0) addrlib uses legacy logic to decide minSizeBlk.
+                              ///  When == 1.0, addrlib uses minimizeAlign. When > 1.0, addrlib applies memory budget
+                              ///  algorithm. Despite 1.5 in tests show significant texture allocation size reduction,
+                              ///  default value 0.0 (legacy behavior) is recommended if not specified by client.
+
+    struct
+    {
+        PrtMapType mapType;   ///< Indicates what sort of PRT metadata is stored in this image. If this image is PRT
+                              ///  metadata, then it can only be associated with an image that is a power-of-two
+                              ///  multiple bigger (or the same size). Image properties needs to include
+                              ///  "PrtFeaturePrtPlus" to create PRT map images. Format must be set to X8_Unorm for
+                              ///  residency map and sampling-status map types.
+        Extent3d   lodRegion; ///< Useful only if mapType is not "none".  Defines the region size of the parent image
+                              ///  that one pixel of this image matches with.  The map image can only be paired with a
+                              ///  parent image of matching dimensions. This parameter can be left at zero.
+    } prtPlus;
+
+    /// The following "pitch" members must be zeroed unless the client is creating a @ref ImageTiling::Linear image and
+    /// wishes to directly specify the image's row and depth pitches.  In that case, they must be integer multiples of
+    /// the alignments given by @ref IDevice::GetLinearImageAlignments, called with an appropriate maxElementSize.
+    uint32   rowPitch;    ///< The image must have this row pitch for the first mip level (in bytes).
+    uint32   depthPitch;  ///< The image must have this depth pitch for the first mip level (in bytes).
+
+    Rational refreshRate; ///< The expected refresh rate when presenting this flippable or stereo image.
+
+    /// By default an image can only be used with image views that exactly match @ref swizzledFormat (the base format).
+    /// If the client wishes to create image views with other formats they must fill out the following fields.
+    ///
+    /// Valid combinations of these fields include:
+    /// - pViewFormats = nullptr, viewFormatCount = 0: all image views must use the base format.
+    /// - pViewFormats = nullptr, viewFormatCount = AllCompatibleFormats: image views can use any compatible formats.
+    /// - Otherwise pViewFormats is an array of additional image view formats (excluding the base format) and
+    ///   viewFormatCount is the length of that array.
+    ///
+    /// @warning pViewFormats is consumed at image creation time and should not be accessed afterwards through
+    ///          @ref GetImageCreateInfo.
+    uint32                viewFormatCount; ///< Must be 0, AllCompatibleFormats, or the length of pViewFormats.
+    const SwizzledFormat* pViewFormats;    ///< See the block comment above for a full description.
+
+#if defined(__unix__)
+    uint64  modifier;                     ///< Drm format modifier. Ignored if flags.hasModifier unset.
+    uint32  modifierPlaneCount;           ///< Number of memory planes of drm format modifier.
+    gpusize modifierMemoryPlaneOffset[3]; ///< Offset of main surface, display Dcc surface and gfx Dcc surface.
+#endif
+};
+
+inline constexpr bool operator==(const ImageCreateInfo& lhs, const ImageCreateInfo& rhs)
+{
+    bool same = (lhs.flags.u32All            == rhs.flags.u32All)            &&
+                (lhs.usageFlags.u32All       == rhs.usageFlags.u32All)       &&
+                (lhs.imageType               == rhs.imageType)               &&
+                (lhs.swizzledFormat          == rhs.swizzledFormat)          &&
+                (lhs.extent                  == rhs.extent)                  &&
+                (lhs.mipLevels               == rhs.mipLevels)               &&
+                (lhs.arraySize               == rhs.arraySize)               &&
+                (lhs.samples                 == rhs.samples)                 &&
+                (lhs.fragments               == rhs.fragments)               &&
+                (lhs.tiling                  == rhs.tiling)                  &&
+                (lhs.tilingPreference        == rhs.tilingPreference)        &&
+                (lhs.tilingOptMode           == rhs.tilingOptMode)           &&
+                (lhs.tileSwizzle             == rhs.tileSwizzle)             &&
+#if PAL_CLIENT_EXAMPLE
+                (lhs.fixedSwizzleMode        == rhs.fixedSwizzleMode)        &&
+#endif
+                (lhs.metadataMode            == rhs.metadataMode)            &&
+                (lhs.metadataTcCompatMode    == rhs.metadataTcCompatMode)    &&
+                (lhs.compressionMode         == rhs.compressionMode)         &&
+                (lhs.clientCompressionMode   == rhs.clientCompressionMode)   &&
+                (lhs.maxBaseAlign            == rhs.maxBaseAlign)            &&
+                (lhs.imageMemoryBudget       == rhs.imageMemoryBudget)       &&
+                (lhs.prtPlus.mapType         == rhs.prtPlus.mapType)         &&
+                (lhs.prtPlus.lodRegion       == rhs.prtPlus.lodRegion)       &&
+                (lhs.rowPitch                == rhs.rowPitch)                &&
+                (lhs.depthPitch              == rhs.depthPitch)              &&
+                (lhs.refreshRate.numerator   == rhs.refreshRate.numerator)   &&
+                (lhs.refreshRate.denominator == rhs.refreshRate.denominator) &&
+                (lhs.viewFormatCount         == rhs.viewFormatCount)         &&
+                true;
+
+#if defined(__unix__)
+    if (same && (lhs.flags.hasModifier != 0))
+    {
+        same = (lhs.modifier           == rhs.modifier) &&
+               (lhs.modifierPlaneCount == rhs.modifierPlaneCount) &&
+               (memcmp(&lhs.modifierMemoryPlaneOffset[0],
+                       &rhs.modifierMemoryPlaneOffset[0],
+                       sizeof(gpusize) * lhs.modifierPlaneCount) == 0);
+    }
+#endif
+
+    if (same && (lhs.viewFormatCount > 0) && (lhs.viewFormatCount != AllCompatibleFormats))
+    {
+        same = (memcmp(lhs.pViewFormats, rhs.pViewFormats, lhs.viewFormatCount * sizeof(SwizzledFormat)) == 0);
+    }
+
+    return same;
+}
+
+/// Specifies properties for presentable @ref IImage creation.  Input structure to IDevice::CreatePresentableImage().
+struct PresentableImageCreateInfo
+{
+    union
+    {
+        struct
+        {
+            uint32 fullscreen   :  1;   ///< Image supports fullscreen presentation.
+            uint32 stereo       :  1;   ///< Image supports stereoscopic rendering and display.
+                                        ///  Implies an array size of 2. Fullscreen must be set.
+            uint32 turbosync    :  1;   ///< Image supports turbosync flip
+            uint32 peerWritable :  1;   ///< Indicates if the memory allocated will be writable by other devices
+            uint32 tmzProtected :  1;   ///< Indicates this presenatble image's memory is tmz Protected.
+#if PAL_AMDGPU_BUILD
+            uint32 initializeToZero :  1; ///< If set, PAL will request that the host OS zero-initializes
+                                          ///  the allocation upon creation, currently, only GpuHeapLocal and
+                                          ///  GpuHeapInvisible are supported.
+#else
+            uint32 placeholder0     :  1; ///< Placeholder.
+#endif
+            uint32 enable256KBSwizzleModes :  1; ///< Enable 256 KiB swizzle modes.
+            uint32 reserved                : 25; ///< Reserved for future use.
+        };
+        uint32 u32All;               ///< Flags packed as 32-bit uint.
+    } flags;                         ///< Presentable image creation flags.
+
+    SwizzledFormat  swizzledFormat;  ///< Pixel format and channel swizzle.
+    ImageUsageFlags usage;           ///< Image usage flags.
+    Extent2d        extent;          ///< Width/height of the image.
+    CompressionMode compressionMode; ///< Specify GFX12-style distributed compression behavior for this resource.
+                                     ///  Only relevant if the backing memory pages enable compression (controllable by
+                                     ///  client with the distributedCompression field in @ref GpuMemoryCreateInfo).
+    const IScreen*  pScreen;         ///< Target screen for fullscreen presentable images. Can be null if the fullscreen
+                                     ///  flag is 0.
+    OsDisplayHandle hDisplay;        ///< Display handle of the local display system only for WSI.
+    OsWindowHandle  hWindow;         ///< Window handle only for WSI.
+    ISwapChain*     pSwapChain;      ///< SwapChain object which the presentable image belongs to.
+
+    /// By default an image can only be used with image views that exactly match @ref swizzledFormat (the base format).
+    /// If the client wishes to create image views with other formats they must fill out the following fields.
+    ///
+    /// Valid combinations of these fields include:
+    /// - pViewFormats = nullptr, viewFormatCount = 0: all image views must use the base format.
+    /// - pViewFormats = nullptr, viewFormatCount = AllCompatibleFormats: image views can use any compatible formats.
+    /// - Otherwise pViewFormats is an array of additional image view formats (excluding the base format) and
+    ///   viewFormatCount is the length of that array.
+    ///
+    /// @warning pViewFormats is consumed at image creation time and should not be accessed afterwards through
+    ///          @ref GetImageCreateInfo.
+    uint32                viewFormatCount; ///< Must be 0, AllCompatibleFormats, or the length of pViewFormats.
+    const SwizzledFormat* pViewFormats;    ///< See the block comment above for a full description.
+};
+
+/// Specifies properties for private screen @ref IImage image creation.  Input structure to
+/// IDevice::CreatePrivateScreenImage().
+struct PrivateScreenImageCreateInfo
+{
+    union
+    {
+        struct
+        {
+            uint32 invariant       :  1; ///< Images with this flag set and all other creation identical are guaranteed
+                                         ///  to have a consistent data layout.
+            uint32 reserved        : 31; ///< Reserved for future use.
+        };
+        uint32 u32All;                 ///< Flags packed as 32-bit uint.
+    } flags;                           ///< Private screen image creation flags.
+
+    SwizzledFormat  swizzledFormat; ///< Pixel format and channel swizzle.
+    ImageUsageFlags usage;          ///< Image usage flags.
+    Extent2d        extent;         ///< Width/height of the image.
+    IPrivateScreen* pScreen;        ///< Private screen this image is created on (then this image can be used to be
+                                    ///  presented on this private screen).
+    CompressionMode compressionMode;///< Specify GFX12-style distributed compression behavior for this resource.
+                                    ///  Only relevant if the backing memory pages enable compression (controllable by
+                                    ///  client with the distributedCompression field in @ref GpuMemoryCreateInfo).
+
+    /// By default an image can only be used with image views that exactly match @ref swizzledFormat (the base format).
+    /// If the client wishes to create image views with other formats they must fill out the following fields.
+    ///
+    /// Valid combinations of these fields include:
+    /// - pViewFormats = nullptr, viewFormatCount = 0: all image views must use the base format.
+    /// - pViewFormats = nullptr, viewFormatCount = AllCompatibleFormats: image views can use any compatible formats.
+    /// - Otherwise pViewFormats is an array of additional image view formats (excluding the base format) and
+    ///   viewFormatCount is the length of that array.
+    ///
+    /// @warning pViewFormats is consumed at image creation time and should not be accessed afterwards through
+    ///          @ref GetImageCreateInfo.
+    uint32                viewFormatCount; ///< Must be 0, AllCompatibleFormats, or the length of pViewFormats.
+    const SwizzledFormat* pViewFormats;    ///< See the block comment above for a full description.
+};
+
+/// Specifies parameters for opening another device's image for peer access from this device.  Input structure to
+/// IDevice::OpenPeerImage().
+struct PeerImageOpenInfo
+{
+    const IImage* pOriginalImage;  ///< Other device's image to be opened for peer access.
+};
+
+/// Specifies parameters for opening another non-PAL device's image for access from this device.  Input structure to
+/// IDevice::OpenExternalSharedImage().
+struct ExternalImageOpenInfo
+{
+    ExternalResourceOpenInfo resourceInfo;   ///< Information describing the external image.
+    Extent3d                 extent;         ///< Expected extent for the external image. This reference value would be
+                                             ///  ignored and use extents from shared metadata if any dimension of the
+                                             ///  reference extent is zero.
+    SwizzledFormat           swizzledFormat; ///< Pixel format and channel swizzle. Or UndefinedFormat to infer the
+                                             ///  format internally.
+    ImageCreateFlags         flags;          ///< Image Creation flags.
+    ImageUsageFlags          usage;          ///< Image usage flags.
+    IPrivateScreen*          pScreen;        ///< Private screen this image is created on, or null.
+    gpusize                  gpuMemOffset;   ///< GpuMemory offset
+#if defined(__unix__)
+    gpusize                  dccOffset;          ///< Offset of gfx Dcc surface if nonzero.
+    gpusize                  displayDccOffset;   ///< Offset of display Dcc surface if nonzero.
+    uint64                   modifier;           ///< Drm format modifier, if flags.hasModifier is set.
+    uint32                   modifierPlaneCount; ///< Number of memory planes of drm format modifier.
+#endif
+    /// The following members must be set to zero unless the client is opening a @ref ImageTiling::Linear image with
+    /// specified row and depth pitches. In that case, they must be integer multiples of the alignments given by
+    /// @ref IDevice::GetLinearImageAlignments, called with an appropriate maxElementSize.
+    gpusize                  rowPitch;        ///< Offset in bytes between the same X position on two consecutive lines
+                                              ///  of the subresource.
+    gpusize                  depthPitch;      ///< Offset in bytes between the same X,Y position of two consecutive
+                                              ///  slices.
+};
+
+/// Reports the overall GPU memory layout of the entire image.  Output structure for IImage::GetMemoryLayout(). Unused
+/// sections will have a size of zero, an offset of zero, and an alignment of one. The layout is split into:
+///       + Image Data: The raw texel values for all subresources of the image.
+///       + Image Metadata: Additional data that will be used to optimize GPU operations that access the image.
+///       + Image Metadata Header: A special subsection of the metadata for small bits of data with weaker alignment.
+struct ImageMemoryLayout
+{
+    gpusize     dataSize;                    ///< The size, in bytes, of the image's core data section.
+    gpusize     dataAlignment;               ///< The alignment, in bytes, of the image's core data section.
+
+    gpusize     metadataOffset;              ///< The offset, in bytes, of the image's metadata section.
+    gpusize     metadataSize;                ///< The size, in bytes, of the image's metadata section.
+    gpusize     metadataAlignment;           ///< The alignment, in bytes, of the image's metadata section.
+
+    gpusize     metadataHeaderOffset;        ///< The offset, in bytes, of the image's metadata header.
+    gpusize     metadataHeaderSize;          ///< The size, in bytes, of the image's metadata header.
+    gpusize     metadataHeaderAlignment;     ///< The alignment, in bytes, of the image's metadata header.
+
+    uint8       swizzleEqIndices[2];         ///< Which swizzle equations this image uses or InvalidSwizzleEqIndex if
+                                             ///  there are no swizzle equations for this image's layout.
+    uint8       swizzleEqTransitionMip;      ///< Before this mip level, the image uses swizzleEqIndices[0]; from this
+                                             ///  mip level onwards, the image uses swizzleEqIndices[1].
+    uint8       swizzleEqTransitionPlane;    ///< Before this mip plane, the image uses swizzleEqIndices[0]; from this
+                                             ///  plane onward, the image uses swizzleEqIndices[1].
+
+    uint32      prtTileWidth;                ///< Width, in texels, of a PRT tile
+    uint32      prtTileHeight;               ///< Height, in texels, of a PRT tile
+    uint32      prtTileDepth;                ///< Depth, in texels, of a PRT tile
+    uint32      prtMinPackedLod;             ///< First mip level that is packed into the PRT mip tail.
+    uint32      prtMipTailTileCount;         ///< Number of tiles in the packed mip tail. This may either indicate the
+                                             ///  size per slice or per image depending on the support for
+                                             ///  PrtFeaturePerLayerMipTail (@see PrtFeatureFlags)
+    uint32      stereoLineOffset;            ///< Y offset to the right eye data, in texels
+};
+
+/// Collection of bitmasks specifying which operations are currently allowed on an image, and which queues are allowed
+/// to perform those operations.  Based on this information, PAL can determine the best compression state of the image.
+struct ImageLayout
+{
+    uint32 usages  : 24;  ///< Bitmask of @ref ImageLayoutUsageFlags values.
+    uint32 engines :  8;  ///< Bitmask of @ref ImageLayoutEngineFlags values.
+};
+
+/**
+****************************************************************************************************
+* @brief
+*   Enumerates swizzle modes useable on any supported GPU.
+* @note
+* For details please check _AddrSwizzleMode
+*
+****************************************************************************************************
+*/
+enum SwizzleMode : uint32
+{
+    SwizzleModeLinear = 0,
+    SwizzleMode256BS,
+    SwizzleMode256BD,
+    SwizzleMode256BR,
+    SwizzleMode4KbZ,
+    SwizzleMode4KbS,
+    SwizzleMode4KbD,
+    SwizzleMode4KbR,
+    SwizzleMode64KbZ,
+    SwizzleMode64KbS,
+    SwizzleMode64KbD,
+    SwizzleMode64KbR,
+    SwizzleMode64KbZT,
+    SwizzleMode64KbST,
+    SwizzleMode64KbDT,
+    SwizzleMode64KbRT,
+    SwizzleMode4KbZX,
+    SwizzleMode4KbSX,
+    SwizzleMode4KbDX,
+    SwizzleMode4KbRX,
+    SwizzleMode64KbZX,
+    SwizzleMode64KbSX,
+    SwizzleMode64KbDX,
+    SwizzleMode64KbRX,
+    SwizzleMode256KbVarZX,
+    SwizzleMode256KbVarSX,
+    SwizzleMode256KbVarDX,
+    SwizzleMode256KbVarRX,
+
+    ///< the meaning of swizzle mode varies by generation,
+    /// do not compare directly with the _R / _S / _D / _S types".
+    SwizzleMode256B2D,
+    SwizzleMode4Kb2D,
+    SwizzleMode4Kb3D,
+    SwizzleMode64Kb2D,
+    SwizzleMode64Kb3D,
+    SwizzleMode256Kb2D,
+    SwizzleMode256Kb3D,
+    SwizzleMode64Kb2Dz,
+    SwizzleMode256Kb2Dz,
+    SwizzleModeCount,
+};
+
+/// Reports position and memory layout information for a specific subresource in an image.  Output structure for
+/// IImage::GetSubresourceLayout().
+struct SubresLayout
+{
+    uint32   elementBytes;  ///< size of each element in bytes
+    gpusize  offset;        ///< Offset in bytes from the base of the image's GPU memory where the subresource starts.
+    gpusize  swizzleOffset; ///< Offset in bytes used for supporting parameterized swizzle
+    gpusize  size;          ///< Size of the subresource in bytes.
+    gpusize  rowPitch;      ///< Offset in bytes between the same X position on two consecutive lines of the subresource.
+    gpusize  depthPitch;    ///< Offset in bytes between the same X,Y position of two consecutive slices.
+    uint32   tileToken;     ///< Token representing various tiling information necessary for determining compatible
+                            ///  optimally tiled copies.
+    uint32   tileSwizzle;   ///< Bank/Pipe swizzle bits for macro-tiling modes.
+    Extent3d blockSize;     ///< Size of a tile block in texels - micro tile for 1D tiling and macro tile for 2D tiling.
+    Offset3d mipTailCoord;  ///< coords of the subresource within the mip tail
+
+    Extent3d extentTexels;   ///< Unpadded extent of the subresource in texels.
+    Extent3d extentElements; ///< Unpadded extent of the subresource in elements.
+    Extent3d paddedExtent;   ///< Extent of the subresource in elements, including all internal padding for this subresource.
+
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 912
+    /// Reports supported engines and usages for this subresource while it can remain in its optimal compression state.
+    /// Clients using CmdRelease()/CmdAcquire() without complete knowledge of the application's next usage during
+    /// CmdRelease() or its previous usage at CmdAcquire() can treat this layout as a performant target for an
+    /// intermediate state that will avoid unnecessary decompressions.
+    ///
+    /// This value is only valid if supportSplitReleaseAcquire is set in @ref DeviceProperties.
+    ImageLayout defaultGfxLayout;
+#endif
+
+    SwizzledFormat planeFormat; ///< Swizzled format for plane. Planar resource like D32-S8
+                                /// will have different swizzled format per plane.
+    SwizzleMode swizzleMode;    ///< Swizzle mode for plane, based on AddrSwizzleMode
+    uint32 hwSwizzleMode;       ///< Hardware Swizzle enum, enum type dependent on gfx version
+};
+
+/// Selects a specific subresource of an image resource.
+///
+/// Most images only have a single data plane but in some cases conceptually related data will be stored in physically
+/// separate locations which we call planes.  If an image only has a single plane it will always be plane 0.
+/// We define the following fixed mappings for all multi-plane formats.
+///       + Depth-stencil: if the image format contains depth and stencil data, plane 0 is depth and plane 1 is stencil.
+///       + YUV-planar: if the image format is @ref YuvPlanar it has either two or three planes.  The luma plane
+///         is always plane 0. If the format is @ref ChNumFormat::YV12 it has three planes where plane 1 is the
+///         red-difference chrominance plane and plane 2 is the blue-difference chrominance plane. Otherwise, plane 1
+///         interleaves blue-difference and red-difference chrominance values.
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 886
+struct SubresId
+{
+    uint8  plane;      ///< Selects a data plane.
+    uint8  mipLevel;   ///< Selects a mip level.
+    uint16 arraySlice; ///< Selects an array slice.
+};
+
+/// Defines a range of subresources.
+struct SubresRange
+{
+    SubresId startSubres;  ///< First subresource in the range.
+    uint8    numPlanes;    ///< Number of planes in the range.
+    uint8    numMips;      ///< Number of mip levels in the range.
+    uint16   numSlices;    ///< Number of slices in the range.
+};
+
+#else
+struct SubresId
+{
+    uint32 plane;      ///< Selects a data plane.
+    uint32 mipLevel;   ///< Selects a mip level.
+    uint32 arraySlice; ///< Selects an array slice.
+};
+
+/// Defines a range of subresources.
+struct SubresRange
+{
+    SubresId startSubres;  ///< First subresource in the range.
+    uint32   numPlanes;    ///< Number of planes in the range.
+    uint32   numMips;      ///< Number of mip levels in the range.
+    uint32   numSlices;    ///< Number of slices in the range.
+};
+
+#endif
+
+/// A variant struct of MemoryImageCopyRegion
+/// Specifies parameters for a copy from CPU memory to Image.
+/// An input for Image::CopyMemoryToImage().
+struct MemoryToImageCopyRegion
+{
+    SubresId       imageSubres;         ///< Selects the image subresource.
+    Offset3d       imageOffset;         ///< Pixel offset to the start of the chosen subresource region.
+    Extent3d       imageExtent;         ///< Size of the image region in elements.
+    uint32         numSlices;           ///< Number of slices the copy will span.
+    const void*    pHostPtr;            ///< Pointer to the host memory copy from.
+    gpusize        gpuMemoryRowPitch;   ///< Offset in bytes between the same X position on two consecutive lines.
+    gpusize        gpuMemoryDepthPitch; ///< Offset in bytes between the same X,Y position of two consecutive slices.
+};
+
+/// A variant struct of MemoryImageCopyRegion
+/// Specifies parameters for a copy from Image to CPU memory.
+/// An input for Image::CopyImageToMemory().
+struct ImageToMemoryCopyRegion
+{
+    SubresId       imageSubres;         ///< Selects the image subresource.
+    Offset3d       imageOffset;         ///< Pixel offset to the start of the chosen subresource region.
+    Extent3d       imageExtent;         ///< Size of the image region in elements.
+    uint32         numSlices;           ///< Number of slices the copy will span.
+    void*          pHostPtr;            ///< Pointer to the host memory copy to.
+    gpusize        gpuMemoryRowPitch;   ///< Offset in bytes between the same X position on two consecutive lines.
+    gpusize        gpuMemoryDepthPitch; ///< Offset in bytes between the same X,Y position of two consecutive slices.
+};
+
+inline constexpr bool operator==(const SubresId& lhs, const SubresId& rhs)
+{
+    return (lhs.plane      == rhs.plane)    &&
+           (lhs.mipLevel   == rhs.mipLevel) &&
+           (lhs.arraySlice == rhs.arraySlice);
+}
+
+inline constexpr bool operator!=(const SubresId& lhs, const SubresId& rhs)
+{
+    return ((lhs == rhs) == false);
+}
+
+inline constexpr bool operator==(const SubresRange& lhs, const SubresRange& rhs)
+{
+    return (lhs.startSubres == rhs.startSubres) &&
+           (lhs.numPlanes   == rhs.numPlanes)   &&
+           (lhs.numMips     == rhs.numMips)     &&
+           (lhs.numSlices   == rhs.numSlices);
+}
+
+/// Determines if two subresource ranges are overlapped.
+///
+/// @returns True if two subresource ranges are overlapped, false otherwise.
+inline constexpr bool OverlappedSubresRanges(
+    const SubresRange& a,
+    const SubresRange& b)
+{
+    const SubresId aStart = a.startSubres;
+    const SubresId bStart = b.startSubres;
+
+    return (aStart.plane      < (bStart.plane      + b.numPlanes)) &&
+           (bStart.plane      < (aStart.plane      + a.numPlanes)) &&
+           (aStart.mipLevel   < (bStart.mipLevel   + b.numMips))   &&
+           (bStart.mipLevel   < (aStart.mipLevel   + a.numMips))   &&
+           (aStart.arraySlice < (bStart.arraySlice + b.numSlices)) &&
+           (bStart.arraySlice < (aStart.arraySlice + a.numSlices));
+}
+
+/**
+ ***********************************************************************************************************************
+ * @interface IImage
+ * @brief     Represents an image resource that can be accessed by the GPU.
+ *
+ * @see IDevice::CreateImage()
+ * @see IDevice::OpenPeerImage()
+ ***********************************************************************************************************************
+ */
+class IImage : public IGpuMemoryBindable
+{
+public:
+    /// Reports information on the layout of the image in memory such as core data size and metadata alignment.
+    ///
+    /// @returns the reference to ImageCreateInfo
+    virtual const ImageMemoryLayout& GetMemoryLayout() const = 0;
+
+    /// Reports information on the full range of the image's subresources.
+    ///
+    /// @param [out] pRange  Reports info on the full range of the image's subresources such as number of mips and
+    ///                      planes.
+    ///
+    /// @returns Success if the layout was successfully reported.  Otherwise, one of the following error codes may be
+    ///          returned:
+    ///          + ErrorInvalidPointer if pRange is null.
+    virtual Result GetFullSubresourceRange(SubresRange* pRange) const = 0;
+
+    /// Reports information on the layout of the specified subresource in memory.
+    ///
+    /// @param [in]  subresId Selects a subresource from the image (aspect/mip/slice).
+    /// @param [out] pLayout  Reports info on the subresource layout such as size and pitch.
+    ///
+    /// @returns Success if the layout was successfully reported.  Otherwise, one of the following error codes may be
+    ///          returned:
+    ///          + ErrorInvalidPointer if pLayout is null.
+    ///          + ErrorInvalidValue is the subresId is out of range for this image.
+    virtual Result GetSubresourceLayout(
+        SubresId      subresId,
+        SubresLayout* pLayout) const = 0;
+
+#if defined(__unix__)
+    /// Reports information on the memory plane layout of the specified subresource in memory for image with modifier.
+    ///
+    /// @param [in]  memoryPlane Selects a memory plane from the image.
+    /// @param [out] pLayout     Reports info on the subresource layout such as size and pitch.
+    ///
+    /// @returns Success if the layout was successfully reported.  Otherwise, one of the following error codes may be
+    ///          returned:
+    ///          + ErrorInvalidValue is the memory plane is out of range for this image.
+    virtual Result GetModifierSubresourceLayout(
+        uint32        memoryPlane,
+        SubresLayout* pLayout) const = 0;
+#endif
+
+    /// Reports the create info of image.
+    ///
+    /// @returns the reference to ImageCreateInfo
+    const ImageCreateInfo& GetImageCreateInfo() const { return m_createInfo; }
+
+    /// Returns the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @returns Pointer to client data.
+    void* GetClientData() const
+    {
+        return m_pClientData;
+    }
+
+    /// Sets the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @param  [in]    pClientData     A pointer to arbitrary client data.
+    void SetClientData(
+        void* pClientData)
+    {
+        m_pClientData = pClientData;
+    }
+
+#if defined(_WIN32)
+    /// This method checks if the image is an opened cross-adapter shared image on MS hybrid graphics system.
+    ///
+    /// @returns True if the image is an opened cross-adapter shared image. False otherwise.
+    virtual bool IsCrossAdapter() const = 0;
+
+    /// Returns a special resource ID. Can be used to associate an opened resource with its original resource.
+    ///
+    /// @returns The optimal sharing ID.
+    virtual uint64 GetOptimalSharingId() const = 0;
+#endif
+
+    /// Sets level of optimal sharing by opening APIs using this optimal sharable image and pass this information to the
+    /// creator. This function is supposed to be called by openers only. The call by creator is ignored.
+    ///
+    /// @param  [in]    level        Level to be set to specified client API.
+    virtual void SetOptimalSharingLevel(
+        MetadataSharingLevel level) = 0;
+
+    /// Returns support level set by all possible opening APIs.
+    ///
+    /// @returns A summarized supporting level.
+    virtual MetadataSharingLevel GetOptimalSharingLevel() const = 0;
+
+    /// Gives the client access to the resource ID used for internal Pal events.
+    /// EX: Resource Create, Resource Bind, Resource Destroy.
+    ///
+    /// @returns The Resource ID.
+    virtual const void* GetResourceId() const = 0;
+
+    /// Copies data directly from CPU memory to an Image.
+    ///
+    /// @param [in]  pRegions    Pointer to an array of Image regions to copy from.
+    /// @param [in]  regionCount Number of regions to copy.
+    /// @param [in]  useMemcpy   Indicates that it could be copied more efficiently from host memory to image with
+    ///                          image data is already swizzled in host memory
+    ///
+    /// @returns Success if the copy between the image and memory was successfully performed.  Otherwise, one of the
+    ///          returned:
+    ///          + ErrorInvalidPointer if the memory object bound to image is null.
+    ///          + ErrorGpuMemoryMapFailed if the memory object is busy and cannot be mapped by the OS.
+    ///          + ErrorOutOfMemory if out of system memory.
+    ///          + ErrorInvalidValue if copy between image and memory failed.
+    virtual Result CopyMemoryToImage(
+        const MemoryToImageCopyRegion* pRegions,
+        const uint32                   regionCount,
+        bool                           useMemcpy) const = 0;
+
+    /// Copies data directly from an Image to CPU memory.
+    ///
+    /// @param [in]  pRegions    Pointer to an array of Image regions to copy to.
+    /// @param [in]  regionCount Number of regions to copy.
+    /// @param [in]  useMemcpy   Indicates that it could be copied more efficiently from image to host memory and the
+    ///                          image data will be obtained while retaining the physical layout of the image.
+    ///
+    /// @returns Success if the copy between the image and memory was successfully performed.  Otherwise, one of the
+    ///          returned:
+    ///          + ErrorInvalidPointer if the memory object bound to image is null.
+    ///          + ErrorGpuMemoryMapFailed if the memory object is busy and cannot be mapped by the OS.
+    ///          + ErrorOutOfMemory if out of system memory.
+    ///          + ErrorInvalidValue if copy between image and memory failed.
+    virtual Result CopyImageToMemory(
+        const ImageToMemoryCopyRegion* pRegions,
+        const uint32                   regionCount,
+        bool                           useMemcpy) const = 0;
+
+    /// Copies data between two images with specified regions.
+    ///
+    /// @param [in]  pDstImage     Pointer to the destination image where the data will be copied.
+    /// @param [in]  pImgRegions   Pointer to an array of regions specifying the area of image to be copied.
+    /// @param [in]  regionCount   Number of regions to copy between the source and destination images.
+    ///
+    /// @returns Success if the copy operation was successfully performed. Otherwise, one of the following error codes
+    ///          may be returned:
+    ///          + ErrorInvalidPointer if any of the input pointers are null.
+    ///          + ErrorGpuMemoryMapFailed if the memory object is busy and cannot be mapped by the OS.
+    ///          + ErrorOutOfMemory if there is insufficient memory to perform the operation.
+    ///          + ErrorInvalidValue if copy between images failed.
+    virtual Result CopyBetweenImages(
+        IImage*                pDstImage,
+        const ImageCopyRegion* pImgRegions,
+        const uint32           regionCount) const = 0;
+
+protected:
+    /// @internal Constructor.
+    ///
+    /// @param [in] createInfo App-specified parameters describing the desired image properties.
+    IImage(const ImageCreateInfo& createInfo) : m_createInfo(createInfo), m_pClientData(nullptr) { }
+
+    /// @internal Destructor.  Prevent use of delete operator on this interface.  Client must destroy objects by
+    /// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
+    /// object on their own.
+    virtual ~IImage() { }
+
+    /// Retained Image create info
+    const ImageCreateInfo m_createInfo;
+
+private:
+    /// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
+    /// and set via SetClientData().
+    /// For non-top-layer objects, this will point to the layer above the current object.
+    void* m_pClientData;
+};
+
+} // Pal
@@ -0,0 +1,508 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palLib.h
+ * @brief Defines the Platform Abstraction Library (PAL) initialization and destruction functions.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palSysMemory.h"
+#include "palDbgPrint.h"
+
+/// Major interface version.  Note that the interface version is distinct from the PAL version itself, which is returned
+/// in @ref Pal::PlatformProperties.
+///
+/// @attention Updates to the major version indicate an interface change that is not backward compatible and may require
+///            action from each client during their next integration.  When determining if a change is backward
+///            compatible, it is assumed that the client will default-initialize all structs.
+///
+/// @ingroup LibInit
+#define PAL_INTERFACE_MAJOR_VERSION 942
+
+/// Minimum major interface version. This is the minimum interface version PAL supports in order to support backward
+/// compatibility. When it is equal to PAL_INTERFACE_MAJOR_VERSION, only the latest interface version is supported.
+///
+/// @ingroup LibInit
+#define PAL_MINIMUM_INTERFACE_MAJOR_VERSION 872
+
+/// Minimum supported major interface version for devdriver library. This is the minimum interface version of the
+/// devdriver library that PAL is backwards compatible to.
+///
+/// @ingroup LibInit
+#define PAL_MINIMUM_GPUOPEN_INTERFACE_MAJOR_VERSION 38
+
+/**
+ ***********************************************************************************************************************
+ * @def     PAL_INTERFACE_VERSION
+ * @ingroup LibInit
+ * @brief   Current PAL interface version packed into a 32-bit unsigned integer. The low 16 bits are always zero.
+ *          They used to contain the interface minor version and remain as a placeholder in case we add it back.
+ *
+ * @see PAL_INTERFACE_MAJOR_VERSION
+ *
+ * @hideinitializer
+ ***********************************************************************************************************************
+ */
+#define PAL_INTERFACE_VERSION (PAL_INTERFACE_MAJOR_VERSION << 16)
+
+namespace Pal
+{
+
+// Forward declarations
+class      IPlatform;
+
+/// This is a list of GPUs that the NULL OS layer can compile shaders to in offline mode.
+enum class NullGpuId : uint32
+{
+    Default = 0,   ///< PAL gives the client an arbitrary supported null device.
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 888
+    Polaris10,     ///< 8.0.3
+    Polaris11,     ///< 8.0.3
+    Polaris12,     ///< 8.0.3
+    Vega10,        ///< 9.0.0
+    Raven,         ///< 9.0.2
+    Vega12,        ///< 9.0.4
+    Vega20,        ///< 9.0.6
+    Raven2,        ///< 9.0.9
+    Renoir,        ///< 9.0.9
+#endif
+    Navi10,        ///< 10.1.0
+    Navi12,        ///< 10.1.1
+    Navi14,        ///< 10.1.2
+    Navi21,        ///< 10.3.0
+    Navi22,        ///< 10.3.1
+    Navi23,        ///< 10.3.2
+    Navi24,        ///< 10.3.4
+    Rembrandt,     ///< 10.3.5
+    Raphael,       ///< 10.3.6
+    Navi31,        ///< 11.0.0
+    Navi32,        ///< 11.0.1
+    Navi33,        ///< 11.0.2
+    Phoenix1,      ///< 11.0.3
+    Phoenix2,      ///< 11.0.3
+    Strix1,        ///< 11.5.0
+    StrixHalo,     ///< 11.5.1
+    Krackan1,      ///< 11.5.2
+    Navi44,        ///< 12.0.0
+    Navi48,        ///< 12.0.1
+#if  (PAL_CLIENT_INTERFACE_MAJOR_VERSION>= 888)
+#endif
+    Max,           ///< The maximum count of null devices.
+    All,           ///< If you want to enumerate all null devices.
+};
+
+/// Specifies which graphics IP level (GFXIP) this device has.
+enum class GfxIpLevel : uint32
+{
+    _None = 0,     ///< @internal The device does not have an GFXIP block, or its level cannot be determined
+
+    // Unfortunately for Linux clients, X.h includes a "#define None 0" macro.  Clients have their choice of either
+    // undefing None before including this header or using _None when dealing with PAL.
+#ifndef None
+    None  = _None, ///< The device does not have an GFXIP block, or its level cannot be determined
+#endif
+
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 888
+    GfxIp10_1,     ///< GFXIP 10.1 (Navi1x)
+    GfxIp10_3,     ///< GFXIP 10.3 (Navi2x, Rembrandt, Raphael, Mendocino)
+    GfxIp11_0,     ///< GFXIP 11.0 (Navi3x, Phoenix)
+    GfxIp11_5,     ///< GFXIP 11.5 (Strix)
+    GfxIp12,       ///< GFXIP 12.0 (Navi4x)
+#else // PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 888
+    GfxIp6    = 0x1,
+    GfxIp7    = 0x2,
+    GfxIp8    = 0x3,
+    GfxIp8_1  = 0x4,
+    GfxIp9    = 0x5,
+    GfxIp10_1 = 0x7,
+    GfxIp10_3 = 0x9,
+    GfxIp11_0 = 0xC,
+    GfxIp11_5 = 0xF,
+    GfxIp12   = 0x11,
+#endif
+};
+
+/// Specifies the hardware revision. Some AMD tools hard-code these values so we cannot change them. New ASICs should
+/// be added at the end of the list and be given the next highest value.
+enum class AsicRevision : uint32
+{
+    Unknown          = 0x00,
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 888
+    Tahiti           = 0x01,
+    Pitcairn         = 0x02,
+    Capeverde        = 0x03,
+    Oland            = 0x04,
+    Hainan           = 0x05,
+    Bonaire          = 0x06,
+    Hawaii           = 0x07,
+    HawaiiPro        = 0x08,
+    Kalindi          = 0x0A,
+    Godavari         = 0x0B,
+    Spectre          = 0x0C,
+    Spooky           = 0x0D,
+    Carrizo          = 0x0E,
+    Bristol          = 0x0F,
+    Stoney           = 0x10,
+    Iceland          = 0x11,
+    Tonga            = 0x12,
+    TongaPro         = Tonga,
+    Fiji             = 0x13,
+    Polaris10        = 0x14,
+    Polaris11        = 0x15,
+    Polaris12        = 0x16,
+    Vega10           = 0x18,
+    Vega12           = 0x19,
+    Vega20           = 0x1A,
+    Raven            = 0x1B,
+    Raven2           = 0x1C,
+    Renoir           = 0x1D,
+#endif
+    Navi10           = 0x1F, ///< 10.1.0
+    Navi12           = 0x21, ///< 10.1.1
+    Navi14           = 0x23, ///< 10.1.2
+    Navi21           = 0x24, ///< 10.3.0
+    Navi22           = 0x25, ///< 10.3.1
+    Navi23           = 0x26, ///< 10.3.2
+    Navi24           = 0x27, ///< 10.3.4
+    Navi31           = 0x2C, ///< 11.0.0
+    Navi32           = 0x2D, ///< 11.0.1
+    Navi33           = 0x2E, ///< 11.0.2
+    Rembrandt        = 0x2F, ///< 10.3.5
+    Strix1           = 0x33, ///< 11.5.0
+    Raphael          = 0x34, ///< 10.3.6
+    Phoenix1         = 0x35, ///< 11.0.3
+    Phoenix2         = 0x38, ///< 11.0.3
+    HawkPoint1       = 0x39, ///< 11.0.3
+    HawkPoint2       = 0x3A, ///< 11.0.3
+    Krackan1         = 0x3B, ///< 11.5.2
+    StrixHalo        = 0x3C, ///< 11.5.1
+    Navi44           = 0x3D, ///< 12.0.0
+    Navi48           = 0x3E, ///< 12.0.1
+};
+
+/// Maps a null GPU ID to its associated text name.
+struct NullGpuInfo
+{
+    NullGpuId   nullGpuId;  ///< ID of an ASIC that PAL supports for override purposes
+    const char* pGpuName;   ///< Text name of the ASIC specified by nullGpuId
+};
+
+/// Various IDs and info associated with a particular GPU.
+struct GpuInfo
+{
+    AsicRevision asicRev;     ///< PAL specific ASIC revision identifier.
+    NullGpuId    nullId;      ///< PAL specific GPU ID supported by the NULL OS layer.
+    GfxIpLevel   gfxIpLevel;  ///< PAL specific identifier for the device's graphics IP level (GFXIP).
+    uint32       familyId;    ///< Hardware family ID. Driver-defined identifier for a particular family of devices.
+    uint32       eRevId;      ///< GPU emulation/internal revision ID.
+    uint32       revisionId;  ///< GPU revision. HW-specific value differentiating between different SKUs or revisions.
+    uint32       gfxEngineId; ///< Coarse-grain GFX engine ID (R800, SI, etc.).
+    uint32       deviceId;    ///< PCI device ID (e.g., Hawaii XT = 0x67B0).
+    const char*  pGpuName;    ///< ASIC name and AMDGPU target name (e.g., "NAVI31:gfx1100").
+};
+
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 915
+/// PAL client APIs.
+enum class ClientApi : uint32
+{
+    Pal     = 0,
+    Dx9     = 1,
+    Dx12    = 3,
+    Vulkan  = 4,
+    OpenCl  = 7,
+    Hip     = 8,
+    Amf     = 9,
+};
+#else
+/// The client UMD must identify its API using this enum. Some UMD builds may implement multiple APIs so they must
+/// specify which API they're implementing at runtime. Note that the PAL_CLIENT macros are the preferred way to
+/// implement client-specific behavior; runtime ClientApi checks should only be used when necessary.
+enum class ClientApi : uint32
+{
+    OpenCl,
+    Hip
+};
+#endif
+
+/// Specifies properties for @ref IPlatform creation. Input structure to Pal::CreatePlatform().
+struct PlatformCreateInfo
+{
+    const Util::AllocCallbacks*  pAllocCb;      ///< Optional client-provided callbacks. If non-null, PAL will call the
+                                                ///  specified callbacks to allocate and free all internal system
+                                                ///  memory. If null, PAL will manage memory on its own through the C
+                                                ///  runtime library.
+    const Util::LogCallbackInfo* pLogInfo;      ///< Optional client-provided callback info.  If non-null, Pal will
+                                                ///  call the callback to pass debug prints to the client.
+
+    const char*                  pSettingsPath; ///< A null-terminated string describing the path to where settings are
+                                                ///  located on the system. For example, on Windows, this will refer to
+                                                ///  which UMD subkey to look in under a device's key. For Linux, this
+                                                ///  is the path to the settings file.
+
+    union
+    {
+        struct
+        {
+            uint32 disableGpuTimeout              :  1; ///< Disables GPU timeout detection (Windows only)
+            uint32 force32BitVaSpace              :  1; ///< Forces 32bit VA space for the flat address with 32bit ISA
+            uint32 createNullDevice               :  1; ///< Set to create a null device, so "nullGpuId" below for the
+                                                        ///  ID of the GPU the created device will be based on.  Null
+                                                        ///  devices operate in IFH mode; useful for off-line shader
+                                                        ///  compilations.
+            uint32 enableSvmMode                  :  1; ///< Enable SVM mode. When this bit is set, PAL will reserve
+                                                        ///  cpu va range with size "maxSvmSize", and allow client to
+                                                        ///  to create gpu or pinned memory for use of Svm.
+                                                        ///  For detail of SVM, please refer to CreateSvmGpuMemory
+            uint32 requestShadowDescriptorVaRange :  1; ///< Requests that PAL provides support for the client to use
+                                                        ///  the @ref VaRange::ShadowDescriptorTable virtual-address
+                                                        ///  range. Some GPU's may not be capable of supporting this,
+                                                        ///  even when requested by the client.
+            uint32 disableInternalResidencyOpts   :  1; ///< Disables residency optimizations for internal GPU memory
+                                                        ///  allocations.  Some clients may wish to have them turned
+                                                        ///  off to save on system resources.
+            uint32 supportRgpTraces               :  1; ///< Indicates that the client supports RGP tracing. PAL will
+                                                        ///  use this flag and the hardware support flag to setup the
+                                                        ///  DevDriver RgpServer.
+            uint32 dontOpenPrimaryNode            :  1; ///< No primary node is needed (Linux only)
+            uint32 disableDevDriver               :  1; ///< If no DevDriverMgr should be created with this Platform.
+            uint32 reserved                       : 23; ///< Reserved for future use.
+        };
+        uint32 u32All;                                  ///< Flags packed as 32-bit uint.
+    } flags;                                            ///< Platform-wide creation flags.
+
+    ClientApi clientApiId; ///< Client API ID.
+    NullGpuId nullGpuId;   ///< ID for the null device. Ignored unless the above flags.createNullDevice bit is set.
+    uint16    apiMajorVer; ///< Major API version number to be used by RGP. Should be set by client based on their
+                           ///  contract with RGP.
+    uint16    apiMinorVer; ///< Minor API version number to be used by RGP. Should be set by client based on their
+                           ///  contract with RGP.
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 916
+    uint32    instrApiVer; ///  Instrumentation specification version for API-specific SQTT instrumentation fields.
+                           ///  Should be set by client based on the SQTT instrumentation spec version being targeted.
+#endif
+    gpusize   maxSvmSize;  ///< Maximum amount of virtual address space that will be reserved for SVM
+};
+
+/**
+************************************************************************************************************************
+* @brief Determines the amount of system memory required for a Platform object.
+*
+* This function must be called before any other interaction with PAL. An allocation of this amount of memory must be
+* provided in the pPlacementAddr parameter of Pal::CreatePlatform.
+*
+* @ingroup LibInit
+*
+* @returns Size, in bytes, of system memory required for an IPlatform object.
+************************************************************************************************************************
+*/
+size_t PAL_STDCALL GetPlatformSize();
+
+/**
+ ***********************************************************************************************************************
+ * @brief Creates the Platform Abstraction Library.
+ *
+ * On execution of CreatePlatform(), PAL will establish a connection for OS and KMD communication, install the specified
+ * system memory allocation callbacks, and initialize any global internal services.  Finally, the client will be
+ * returned an object pointer to the instantiated platform object, which is used to query the capabilities of the
+ * system.
+ *
+ * @ingroup LibInit
+ *
+ * @param [in]  createInfo     Parameters indicating the client requirements for the platform such as allocation
+                               callbacks or the settings path.
+ * @param [in]  pPlacementAddr Pointer to the location where PAL should construct this object.  There must be as
+ *                             much size available here as reported by calling GetPlatformSize().
+ * @param [out] ppPlatform     Platform object pointer to the instantiated platform. Must not be null.
+ *
+ * @returns Success if the initialization completed successfully.  Otherwise, one of the following error codes may be
+ *          returned:
+ *          + ErrorInvalidPointer will be returned if:
+ *              - pPlatform is null.
+ *              - pPlacementAddr is null.
+ *              - createInfo.pAllocCb is non-null but pfnAlloc and/or pfnFree is null.
+ *              - createInfo.pSettingsPath is null.
+ *          + ErrorInitializationFailed will be returned if PAL is unable to open a connection to the OS.
+ ***********************************************************************************************************************
+ */
+Result PAL_STDCALL CreatePlatform(
+    const PlatformCreateInfo&   createInfo,
+    void*                       pPlacementAddr,
+    IPlatform**                 ppPlatform);
+
+/**
+ ***********************************************************************************************************************
+ * @brief Provides an association of NULL devices and their associated text name.  NULL devices operate in IFH mode
+ *        and are primarily intended for off-line shader compilation mode.  The text name is provided for end-user
+ *        identification of the GPU device being created.
+ *
+ * @param [in,out] pNullDeviceCount   On input, this is the size of the "pNullDevices" array.  On output, this
+ *                                    reflects the number of valid entries in the "pNullDevices" array.
+ * @param [out]    pNullDevices       Includes information on the valid NULL devices supported by the system.  If
+ *                                    this is NULL, then pNullDeviceCount reflects the maximum possible size of the
+ *                                    null-devices array.
+ *
+ * @returns Success if the initialization completed successfully.  Otherwise, one of the following error codes may be
+ *          returned:
+ *          + ErrorInvalidPointer will be returned if either input is NULL.
+ ***********************************************************************************************************************
+ */
+Result PAL_STDCALL EnumerateNullDevices(
+    uint32*       pNullDeviceCount,
+    NullGpuInfo*  pNullDevices);
+
+/**
+ ***********************************************************************************************************************
+ * @brief Provides the NULL device GpuInfo data for the specified NullGpuId.
+ *
+ * @param [in]  nullGpuId Null GPU ID to lookup.
+ * @param [out] pGpuInfo  GpuInfo data on successful lookup. Must not be null.
+ *
+ * @returns Success if the lookup completed successfully. Otherwise, one of the following error codes may be returned:
+ *          + ErrorInvalidPointer will be returned if pGpuInfo is NULL.
+ *          + NotFound will be returned if the Null GPU ID was not found.
+ ***********************************************************************************************************************
+ */
+Result PAL_STDCALL GetNullGpuInfoForNullGpuId(
+    NullGpuId nullGpuId,
+    GpuInfo*  pGpuInfo);
+
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 933
+inline Result PAL_STDCALL GetGpuInfoForNullGpuId(
+    NullGpuId nullGpuId,
+    GpuInfo*  pGpuInfo)
+{
+    return GetNullGpuInfoForNullGpuId(nullGpuId, pGpuInfo);
+}
+#endif
+
+/**
+ ***********************************************************************************************************************
+ * @brief Provides the NULL device GpuInfo data for the specified GPU name string.
+ *
+ * @param [in]  pGpuName Name string of the GPU to lookup (e.g., "NAVI10").
+ * @param [out] pGpuInfo GpuInfo data on successful lookup. Must not be null.
+ *
+ * @returns Success if the lookup completed successfully. Otherwise, one of the following error codes may be returned:
+ *          + ErrorInvalidPointer will be returned if pGpuName or pGpuInfo are NULL.
+ *          + NotFound will be returned if the Name string was not found.
+ ***********************************************************************************************************************
+ */
+Result PAL_STDCALL GetNullGpuInfoForName(
+    const char* pGpuName,
+    GpuInfo*    pGpuInfo);
+
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 933
+inline Result PAL_STDCALL GetGpuInfoForName(
+    const char* pGpuName,
+    GpuInfo*    pGpuInfo)
+{
+    return GetNullGpuInfoForName(pGpuName, pGpuInfo);
+}
+#endif
+
+/**
+ ***********************************************************************************************************************
+ * @brief Provides the NULL device GpuInfo data for the specified hardware revision.
+ *
+ * @param [in]  asicRevision Hardware revision to lookup.
+ * @param [out] pGpuInfo     GpuInfo data on successful lookup. Must not be null.
+ *
+ * @returns Success if the lookup completed successfully. Otherwise, one of the following error codes may be returned:
+ *          + ErrorInvalidPointer will be returned if pGpuInfo is NULL.
+ *          + NotFound will be returned if the hardware revision was not found.
+ ***********************************************************************************************************************
+ */
+Result PAL_STDCALL GetNullGpuInfoForAsicRevision(
+    AsicRevision asicRevision,
+    GpuInfo*     pGpuInfo);
+
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 933
+inline Result PAL_STDCALL GetGpuInfoForAsicRevision(
+    AsicRevision asicRevision,
+    GpuInfo*     pGpuInfo)
+{
+    return GetNullGpuInfoForAsicRevision(asicRevision, pGpuInfo);
+}
+#endif
+
+/**
+ ***********************************************************************************************************************
+ * @defgroup LibInit Library Initialization and Destruction
+ *
+ * Before initializing PAL, it is important to make sure that the interface version is consistent with the client's
+ * expectations.  The client should check @ref PAL_INTERFACE_MAJOR_VERSION to ensure the major interface version has not
+ * changed since the last PAL integration.  Ideally, this should be performed with a compile-time assert comparing
+ * @ref PAL_INTERFACE_MAJOR_VERSION against a client-maintained expected major version.   Minor interface version
+ * changes should be backward compatible, and do not require a client change to maintain previous levels of
+ * functionality.
+ *
+ * On startup, the client's first call to PAL must be GetPlatformSize() followed by CreatePlatform().  This function
+ * gives an opportunity for PAL to perform any necessary platform-wide initialization such as opening a connection for
+ * communication with the operating system and kernel mode driver or initializing tracking facilities for system memory
+ * management.  CreatePlatform() returns a created IPlatform object for future interaction with PAL.
+ *
+ * PAL optionally allows the client to specify a set of memory management callbacks during initialization.  If
+ * specified, PAL will not allocate or free any memory directly from the runtime, instead calling back to the client.
+ * The client (or application, if the client forwards on the requests) may be able to implement a more efficient
+ * allocation scheme.
+ *
+ * After a successful call to CreatePlatform(), the client should call @ref IPlatform::EnumerateDevices() in order to
+ * get a list of supported devices attached to the system.  This function returns an array of @ref IDevice objects
+ * which are used by the client to query properties of the devicess and eventually execute work on those devices.
+ * IPlatform::EnumerateDevices() is not available to util-only clients (PAL_BUILD_CORE=0).
+ *
+ * The client may re-enumerate devices at any time by calling IPlatform::EnumerateDevices().  The client must make sure
+ * there is no active work on any device and that all objects associated with those devices have been destroyed.
+ * IPlatform::EnumerateDevices() will destroy all previously reported @ref IDevice objects and return a fresh set.
+ * The client is required to re-enumerate devices when it receives a ErrorDeviceLost error from PAL.
+ *
+ * After enumerating devices, either during start-up or when recovering from an ErrorDeviceLost error, the client must
+ * setup and finalize PAL's per-device settings.  See IDevice::GetPublicSettings(), IDevice::SetDxRuntimeData(),
+ * IDevice::CommitSettingsAndInit(), and IDevice::Finalize() for details.
+ *
+ * After enumerating devices and finalizing them, the client may query the set of available screens. This is done by
+ * calling the @ref IPlatform::GetScreens() function.  Note that screens are not available for DX clients.  Each screen
+ * is accessible by zero or more of the enumerated devices. Most screens are accessible from a "main" device as well as
+ * several other devices which can perform cross-display Flip presents to the screen. In some configurations, screens
+ * may not be directly to any of PAL's devices, in which case fullscreen presents are unavailable to that screen. (This
+ * typically only occurs in PowerExpress configurations.) Note that when IPlatform::EnumerateDevices() is called, any
+ * enumerated @ref IScreen objects which existed prior to that call are invalidated for the specified platform and
+ * IPlatform::GetScreens() needs to be called again to get the updated list of screens.
+ *
+ * On shutdown, the client should call @ref IPlatform::Destroy() to allow PAL to cleanup and free any remaining
+ * platform-wide resources.  The client must ensure this call is not made until all other created objects are idle and
+ * destroyed (if destroyable).
+ *
+ * When the client is asked to destroy a device it may call IDevice::Cleanup() to explicitly clean up the device. Some
+ * clients will find it necessary to call Cleanup(), for example, if their devices have OS handles that become invalid.
+ * Note that Cleanup() doesn't destroy the device; it will return to its initial state, as if it was newly enumerated.
+ ***********************************************************************************************************************
+ */
+
+} // Pal
@@ -0,0 +1,187 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palMsaaState.h
+ * @brief Defines the Platform Abstraction Library (PAL) IMsaaState interface and related types.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palDestroyable.h"
+
+namespace Pal
+{
+
+/// Specifies conservative rasterization mode
+enum class ConservativeRasterizationMode : uint8
+{
+    Overestimate    = 0x0,  ///< Fragments will be generated if the primitive area covers any portion of the pixel.
+    Underestimate   = 0x1,  ///< Fragments will be generated if all of the pixel is covered by the primitive.
+    Count
+};
+
+/// Maximum supported number of MSAA color samples.
+constexpr uint32 MaxMsaaColorSamples = 16;
+
+/// Maximum supported number of MSAA depth samples.
+constexpr uint32 MaxMsaaDepthSamples = 8;
+
+/// Maximum supported number of MSAA fragments.
+constexpr uint32 MaxMsaaFragments = 8;
+
+/// Sampling pattern grid size. This is a quad of pixels, i.e. 2x2 grid of pixels.
+constexpr Extent2d MaxGridSize = { 2, 2 };
+
+/// The positions are rounded to 1/Pow2(SubPixelBits)
+constexpr uint32 SubPixelBits = 4;
+
+/// Each pixel is subdivided into Pow2(SubPixelBits) x Pow2(SubPixelBits) grid of possible sample locations.
+constexpr Extent2d SubPixelGridSize = { 16, 16 };
+
+/// Represents a 2D coordinate with each component in [-8/16, 7/16]
+struct SampleLocation
+{
+    int8 x; ///< X offset.
+    int8 y; ///< Y offset.
+
+    /// Conversion operator that does sign-extension.
+    operator Offset2d() const { return { x, y }; }
+};
+
+/// Specifies a custom multisample pattern for a pixel quad.
+struct MsaaQuadSamplePattern
+{
+    SampleLocation topLeft[MaxMsaaRasterizerSamples];       ///< Sample locations for TL pixel of quad.
+    SampleLocation topRight[MaxMsaaRasterizerSamples];      ///< Sample locations for TR pixel of quad.
+    SampleLocation bottomLeft[MaxMsaaRasterizerSamples];    ///< Sample locations for BL pixel of quad.
+    SampleLocation bottomRight[MaxMsaaRasterizerSamples];   ///< Sample locations for BR pixel of quad.
+};
+
+/// Specifies properties for creation of an @ref IMsaaState object.  Input structure to IDevice::CreateMsaaState().
+struct MsaaStateCreateInfo
+{
+    uint8  coverageSamples;         ///< Number of rasterizer samples. Must be greater than or equal to all sample
+                                    ///  rates in the pipeline. Valid values are 1, 2, 4, 8, and 16.
+    uint8  exposedSamples;          ///< Number of samples exposed in the pixel shader coverage mask.  Must be less
+                                    ///  than or equal to coverageSamples. Valid values are 1, 2, 4, and 8.
+    uint8  pixelShaderSamples;      ///< Controls the pixel shader execution rate. Must be less than or equal to
+                                    ///  coverageSamples. Valid values are 1, 2, 4, and 8. Note that value with
+                                    ///  greater than 1 doesn't mean sample rate shading is enabled. Sample rate
+                                    ///  shading is enabled by either @ref forceSampleRateShading or pixel shader.
+    uint8  depthStencilSamples;     ///< Number of samples in the bound depth target. Must be less than or equal to
+                                    ///  coverageSamples. Valid values are 1, 2, 4, and 8.
+    uint8  shaderExportMaskSamples; ///< Number of samples to use in the shader export mask. Should match the number
+                                    ///  of color target fragments clamped to
+                                    ///  @ref DeviceProperties imageProperties.maxMsaaFragments.
+    uint8  sampleClusters;          ///< Number of sample clusters to control over-rasterization (all samples in a
+                                    ///  cluster are rasterized if any are hit). Must be less than or equal to
+                                    ///  coverageSamples. Valid values are 1, 2, 4, and 8.
+    uint8  alphaToCoverageSamples;  ///< How many samples of quality to generate with alpha-to-coverage. Must be
+                                    ///  less than or equal to coverageSamples. Valid values are 1, 2, 4, 8, and 16.
+    uint8  occlusionQuerySamples;   ///< Controls the number of samples to use for occlusion queries.
+                                    ///  This value must never exceed the MSAA rate.
+    uint16 sampleMask;              ///< Bitmask of which color target and depth/stencil samples should be updated.
+                                    ///  The lowest bit corresponds to sample 0.
+
+    /// Selects overestimate or underestimate conservative rasterization mode. Used only if
+    /// @ref MsaaStateCreateInfo::flags::enableConservativeRasterization is set to true.
+    ConservativeRasterizationMode conservativeRasterizationMode;
+
+    union
+    {
+        struct
+        {
+            uint8 enableConservativeRasterization : 1; ///< Set to true to enable conservative rasterization
+            uint8 enable1xMsaaSampleLocations     : 1; ///< Set to true to enable 1xMSAA quad sample pattern
+            uint8 disableAlphaToCoverageDither    : 1; ///< Disables coverage dithering.
+            uint8 enableLineStipple               : 1; ///< Set to true to enable line stippling
+            uint8 forceSampleRateShading          : 1; ///< Sample rate shading can be enabled by either the pixel
+                                                       ///  shader, or forced here with forceSampleRateShading = 1.
+                                                       ///  Value 0 means sample rate shading is decided by pixel shader
+                                                       ///  and value 1 means sample rate shading is forced enabled.
+                                                       ///  This bit is for openGL glMinSampleShading, where sample rate
+                                                       ///  shading can be enabled by glEnable(GL_SAMPLE_SHADING)
+                                                       ///  instead of by the pixel shader.
+            uint8 reserved                        : 3; ///<  Reserved for future use
+        };
+        uint8 u8All;
+    } flags;
+};
+
+/**
+ ***********************************************************************************************************************
+ * @interface IMsaaState
+ * @brief     Dynamic state object controlling fixed function MSAA state.
+ *
+ * Configures sample counts of various portions of the pipeline, specifies sample positions, etc.  The full range of
+ * EQAA hardware features are exposed.
+ *
+ * @see IDevice::CreateMsaaState
+ ***********************************************************************************************************************
+ */
+class IMsaaState : public IDestroyable
+{
+public:
+
+    /// Returns the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @returns Pointer to client data.
+    void* GetClientData() const
+    {
+        return m_pClientData;
+    }
+
+    /// Sets the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @param  [in]    pClientData     A pointer to arbitrary client data.
+    void SetClientData(
+        void* pClientData)
+    {
+        m_pClientData = pClientData;
+    }
+
+protected:
+    /// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
+    /// called the proper create method.
+    IMsaaState() : m_pClientData(nullptr) {}
+
+    /// @internal Destructor.  Prevent use of delete operator on this interface.  Client must destroy objects by
+    /// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
+    /// object on their own.
+    virtual ~IMsaaState() { }
+
+private:
+    /// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
+    /// and set via SetClientData().
+    /// For non-top-layer objects, this will point to the layer above the current object.
+    void* m_pClientData;
+};
+
+} // Pal
@@ -0,0 +1,619 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palPerfExperiment.h
+ * @brief Defines the Platform Abstraction Library (PAL) IPerfExperiment interface and related types.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palGpuMemoryBindable.h"
+
+namespace Pal
+{
+
+/// Specifies a particular block on the GPU to gather counters for.
+enum class GpuBlock : uint32
+{
+    Cpf      = 0x0,
+    Ia       = 0x1,
+    Vgt      = 0x2,
+    Pa       = 0x3,
+    Sc       = 0x4,
+    Spi      = 0x5,
+    Sq       = 0x6,
+    Sx       = 0x7,
+    Ta       = 0x8,
+    Td       = 0x9,
+    Tcp      = 0xA,
+    Tcc      = 0xB,
+    Tca      = 0xC,
+    Db       = 0xD,
+    Cb       = 0xE,
+    Gds      = 0xF,
+    Srbm     = 0x10,
+    Grbm     = 0x11,
+    GrbmSe   = 0x12,
+    Rlc      = 0x13,
+    Dma      = 0x14,
+    Mc       = 0x15,
+    Cpg      = 0x16,
+    Cpc      = 0x17,
+    Wd       = 0x18,
+    Tcs      = 0x19,
+    Atc      = 0x1A,
+    AtcL2    = 0x1B,
+    McVmL2   = 0x1C,
+    Ea       = 0x1D,
+    Rpb      = 0x1E,
+    Rmi      = 0x1F,
+    Umcch    = 0x20,
+    Ge       = 0x21,
+    Gl1a     = 0x22,
+    Gl1c     = 0x23,
+    Gl1cg    = 0x24,
+    Gl2a     = 0x25, // TCA is used in Gfx9, and changed to GL2A in Gfx10
+    Gl2c     = 0x26, // TCC is used in Gfx9, and changed to GL2C in Gfx10
+    Cha      = 0x27,
+    Chc      = 0x28,
+    Chcg     = 0x29,
+    Gus      = 0x2A,
+    Gcr      = 0x2B,
+    Ph       = 0x2C,
+    UtcL1    = 0x2D,
+    Ge1      = Ge,
+    GeDist   = 0x2E,
+    GeSe     = 0x2F,
+    DfMall   = 0x30, // The DF subblocks have unique instances and event IDs but they all share the DF's perf counters.
+    SqWgp    = 0x31, // SQ counters that can be sampled at WGP granularity.
+    Pc       = 0x32,
+    Gl1xa    = 0x33,
+    Gl1xc    = 0x34,
+    Wgs      = 0x35,
+    EaCpwd   = 0x36,
+    EaSe     = 0x37,
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 926
+    RlcUser  = 0x38,
+#else
+    RlcLocal = 0x38,
+#endif
+    Count
+};
+
+/// Distinguishes between global and streaming performance monitor (SPM) counters.
+enum class PerfCounterType : uint32
+{
+    Global = 0x0, ///< Represents the traditional summary perf counters.
+    Spm    = 0x1, ///< Represents streaming performance counters.
+    Spm32  = 0x2, ///< Represents 32bit streaming performance counters
+    Count
+};
+
+/// Reports the type of data the hardware writes for a particular counter.
+enum class PerfCounterDataType : uint32
+{
+    Uint32 = 0x0,
+    Uint64 = 0x1,
+    Count
+};
+
+/// Distinguishes between normal thread traces and streaming performance monitor (SPM) traces.
+enum class PerfTraceType : uint32
+{
+    ThreadTrace = 0x0,
+    SpmTrace    = 0x1,
+    Count
+};
+
+/// Mask values ORed together to choose which shader stages a performance experiment should sample.
+enum PerfExperimentShaderFlags
+{
+    PerfShaderMaskPs  = 0x01,
+    PerfShaderMaskVs  = 0x02,
+    PerfShaderMaskGs  = 0x04,
+    PerfShaderMaskEs  = 0x08,
+    PerfShaderMaskHs  = 0x10,
+    PerfShaderMaskLs  = 0x20,
+    PerfShaderMaskCs  = 0x40,
+    PerfShaderMaskAll = 0x7f,
+};
+
+/// Selects one of generic performance trace markers, which the client can use to track data of its own choosing.
+enum class PerfTraceMarkerType : uint32
+{
+    SqttA = 0x0,
+    SqttB = 0x1,
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 874
+    A = SqttA,
+    B = SqttB,
+#endif
+    SpmA  = 0x2,
+    SpmB  = 0x3,
+    SpmC  = 0x4,
+    SpmD  = 0x5,
+    Count
+};
+
+/// Specifies available features in device for supporting performance measurements.
+union PerfExperimentDeviceFeatureFlags
+{
+    struct
+    {
+        uint32 counters          :  1; ///< Device supports performance counters.
+        uint32 threadTrace       :  1; ///< Device supports thread traces.
+        uint32 spmTrace          :  1; ///< Device supports streaming perf monitor traces.
+        uint32 dfSpmTrace        :  1; ///< Device supports streaming df perf monitor traces.
+        uint32 supportPs1Events  :  1; ///< The thread trace HW of this Device is capable of producing event tokens
+                                       ///  from the second PS backend of SC.
+        uint32 sqttBadScPackerId :  1; ///< Hardware is affected by bug causing the packer ID specified in new PS waves
+                                       ///  to be incorrect in SQ thread trace data.
+        uint32 reserved          : 26; ///< Reserved for future use.
+    };
+    uint32     u32All;                 ///< Feature flags packed as 32-bit uint.
+};
+
+/// Specifies properties for a perf counter being added to a perf experiment.  Input structure to
+/// IPerfExperiment::AddCounter().
+///
+/// A note for GpuBlock::SqWgp
+/// Client of palPerfExperiment may configure counters of GpuBlock::SqWgp based on a per-wgp granularity
+/// only if the following are disabled: GFXOFF, virtualization/SRIOV, VDDGFX (power down features), clock gating (CGCG)
+/// and power gating. PAL expose this feature to clients.
+/// If any of the conditions above cannot be met, it's the client's job to set all WGPs in the same SE to the same
+/// perf counter programming. In this case, GpuBlock::SqWgp's perf counter works on a per-SE granularity.
+/// Strictly speaking, it's not true that the counters work on a per-SE granularity when those power features
+/// are enabled. It's all still per-WGP in HW, we just can't support different counter configs within the same SE.
+/// The counter data is still reported per WGP (not aggregated for the whole SE).
+///
+struct PerfCounterInfo
+{
+    PerfCounterType              counterType; ///< Type of counter to add.
+    GpuBlock                     block;       ///< Which block to reference.
+    uint32                       instance;    ///< Instance of that block in the device.
+    uint32                       eventId;     ///< Which event ID to track.
+
+    // Some blocks have additional per-counter controls. They must be properly programmed when adding counters for
+    // the relevant blocks. It's recommended to zero them out when not in use.
+    union
+    {
+        struct
+        {
+            uint32 eventQualifier;   ///< The DF counters have an event-specific qualifier bitfield.
+        } df;
+
+        struct
+        {
+            uint16 eventThreshold;   ///< Threshold value for those UMC counters having event-specific threshold.
+            uint8  eventThresholdEn; ///< Threshold enable (0 for disabled,1 for <threshold,2 for >threshold).
+            uint8  rdWrMask;         ///< Read/Write mask select (1 for Read, 2 for Write).
+        } umc;
+
+        uint32 rs64Cntl; ///< CP blocks CPG and CPC have events that can be further filtered for processor events
+
+        uint32 u32All; ///< Union value for copying, must be increased in size if any element of the union exceeds
+    } subConfig;
+};
+
+/// Specifies properties for setting up a streaming performance counter trace. Input structure to
+/// IPerfExperiment::AddSpmTrace().
+struct SpmTraceCreateInfo
+{
+    uint32                 spmInterval;       ///< Interval between each sample in terms of GPU sclks. Minimum of 32.
+    gpusize                ringSize;          ///< Suggested size of the SPM output ring buffer in bytes. PAL may use
+                                              ///  a smaller ring in practice but it cannot exceed this size.
+    uint32                 numPerfCounters;   ///< Number of performance counters to be collected in this trace.
+    const PerfCounterInfo* pPerfCounterInfos; ///< Array of size numPerfCounters of PerfCounterInfo(s).
+};
+
+/// Reports layout of a single global perf counter sample.
+struct GlobalSampleLayout
+{
+    GpuBlock            block;             ///< Type of GPU block.
+    uint32              instance;          ///< Which instance of that type of GPU block.
+    uint32              slot;              ///< Slot varies in meaning per block.
+    uint32              eventId;           ///< Sampled event ID.
+    PerfCounterDataType dataType;          ///< What type of data is written (e.g., 32-bit uint).
+    gpusize             beginValueOffset;  ///< Offset in bytes where the sample data begins.
+    gpusize             endValueOffset;    ///< Offset in bytes where the sample data ends.
+};
+
+/// Describes the layout of global perf counter data in memory.
+struct GlobalCounterLayout
+{
+    uint32             sampleCount;  ///< Number of samples described in samples[].
+    GlobalSampleLayout samples[1];   ///< Describes the layout of each sample.  This structure is repeated (sampleCount
+                                     ///  - 1) additional times.
+};
+
+/// Enumeration of SQ Thread trace token types. All versions of Thread Trace (TT) are represented. If an unsupported
+/// token is enabled, no error is reported.
+enum ThreadTraceTokenTypeFlags : Pal::uint32
+{
+    Misc         = 0x00000001, ///< A miscellaneous event has been sent. TT 2.3
+    Timestamp    = 0x00000002, ///< Timestamp tokens. TT 2.3
+    Reg          = 0x00000004, ///< Register activity token. TT 2.3
+    WaveStart    = 0x00000008, ///< A wavefront has started. TT 2.3
+    WaveAlloc    = 0x00000010, ///< Output space has been allocated for vertex position or color/Z. TT 2.3.
+    RegCsPriv    = 0x00000020, ///< There has been a compute pipeline private data, state or threadgroup update. TT 2.3.
+    WaveEnd      = 0x00000040, ///< Wavefront completion. TT 2.3
+    Event        = 0x00000080, ///< An event has reached the top of a shader stage. TT 2.3
+    EventCs      = 0x00000100, ///< An event has reached the top of a compute shader stage. TT 2.3
+    EventGfx1    = 0x00000200, ///< An event has reached the top of a shader stage for the second GFX pipe. TT 2.3
+    Inst         = 0x00000400, ///< The shader has executed an instruction. TT 2.3
+    InstPc       = 0x00000800, ///< The shader has explicitly written the PC value. TT 2.3
+    InstUserData = 0x00001000, ///< The shader has written user data into the thread trace buffer. TT 2.3
+    Issue        = 0x00002000, ///< Provides information about instruction scheduling. TT 2.3
+    Perf         = 0x00004000, ///< The performance counter delta has been updated. TT 2.3 and below only.
+    RegCs        = 0x00008000, ///< A compute  state update packet has been received by the SPI. TT 2.3
+    VmemExec     = 0x00010000, ///< A previously issued VMEM instruction is now being sent to LDS/TA. TT 3.0
+    AluExec      = 0x00020000, ///< A previously issued VALU instruction is now being executed. TT 3.0
+    ValuInst     = 0x00040000, ///< A VALU instruction has been issued. TT 3.0.
+    WaveRdy      = 0x00080000, ///< Mask of which waves became ready this cycle but did not issue an instruction. TT 3.0
+    Immed1       = 0x00100000, ///< One wave issued an immediate instruction this cycle. TT 3.0.
+    Immediate    = 0x00200000, ///< One or more waves have issued an immediate instruction this cycle. TT 3.0.
+    UtilCounter  = 0x00400000, ///< A new set of utilization counter values. TT 3.0.
+    RealTime     = 0x00800000, ///< Output realtime. TT 3.3.
+    All          = 0xFFFFFFFF  ///< Enable all the above tokens.
+};
+
+/// Enumeration of register types whose reads/writes can be traced. Register reads are disabled by default as it can
+/// generate a lot of traffic and cause the GPU to hang.
+enum ThreadTraceRegTypeFlags : Pal::uint32
+{
+    EventRegs             = 0x00000001, ///< Event registers. TT 2.3.
+    DrawRegs              = 0x00000002, ///< Draw registers. TT 2.3.
+    DispatchRegs          = 0x00000004, ///< Dispatch registers. TT 2.3.
+    UserdataRegs          = 0x00000008, ///< UserData Registers. Must be explicitly requested in TT 2.3.
+    MarkerRegs            = 0x00000010, ///< Thread trace marker data regs. TT 2.3.
+    ShaderConfigRegs      = 0x00000020, ///< Shader configuration state. TT 3.0.
+    ShaderLaunchStateRegs = 0x00000040, ///< Shader program launch state. TT 3.0.
+    GraphicsPipeStateRegs = 0x00000080, ///< Graphics pipeline state. TT 3.0.
+    AsyncComputeRegs      = 0x00000100, ///< Async compute registers. TT 3.0.
+    GraphicsContextRegs   = 0x00000200, ///< Graphics context registers. TT 3.0.
+    OtherConfigRegs       = 0x00000400, ///< Other regs. TT 2.3.
+    AllRegWrites          = 0x000007FF, ///< All reg writes other than OtherBusRegs.
+    OtherBusRegs          = 0x00000800, ///< All write activity over gfx and compute buses. Debug only. TT 3.0.
+    AllRegReads           = 0x00001000, ///< Not encouraged to be enabled. This can cause a GPU hang.
+    AllReadsAndWrites     = 0xFFFFFFFF  ///< All reads and writes. Not encouraged. This can cause a GPU hang.
+};
+
+/// Represents thread trace token types and register types that can be enabled to be reported in the trace data. If
+/// a particular token type or reg type is unsupported, no error is returned and the thread trace is configured with
+/// the minimum supported tokens in the user provided config.
+struct ThreadTraceTokenConfig
+{
+    /// Mask of ThreadTraceTokenTypeFlags
+    uint32 tokenMask;
+
+    /// Mask of ThreadTraceRegTypeFlags
+    uint32 regMask;
+};
+
+/// Specifies properties for a perf trace being added to a perf experiment.  Input structure to
+/// IPerfExperiment::AddThreadTrace().
+struct ThreadTraceInfo
+{
+    PerfTraceType              traceType;    ///< Type of trace to add.
+    uint32                     instance;     ///< Selected trace instance.
+
+    union
+    {
+        struct
+        {
+            // Options common to all traces
+            uint32 bufferSize                            :  1;
+
+            // Thread trace only options
+            uint32 threadTraceTargetSh                   :  1;
+            uint32 threadTraceTargetCu                   :  1;
+            uint32 threadTraceSh0CounterMask             :  1;
+            uint32 threadTraceSh1CounterMask             :  1;
+            uint32 threadTraceSimdMask                   :  1;
+            uint32 threadTraceVmIdMask                   :  1;
+            uint32 threadTraceRandomSeed                 :  1;
+            uint32 threadTraceShaderTypeMask             :  1;
+            uint32 threadTraceIssueMask                  :  1;
+            uint32 threadTraceWrapBuffer                 :  1;
+            uint32 threadTraceStallBehavior              :  1;
+            uint32 threadTraceTokenConfig                :  1;
+            uint32 threadTraceStallAllSimds              :  1;
+            uint32 threadTraceExcludeNonDetailShaderData :  1;
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 899
+            uint32 threadTraceEnableExecPop              :  1;
+#else
+            uint32 placeholder2                          :  1;
+#endif
+            uint32 reserved                              : 16;
+        };
+        uint32 u32All;
+    } optionFlags;
+
+    struct
+    {
+        // Options common to all traces
+        size_t                    bufferSize;
+
+        // Thread trace only options
+        ThreadTraceTokenConfig    threadTraceTokenConfig;
+        uint32                    threadTraceTargetSh;
+        uint32                    threadTraceTargetCu;
+        uint32                    threadTraceSh0CounterMask;
+        uint32                    threadTraceSh1CounterMask;
+        uint32                    threadTraceSimdMask;
+        uint32                    threadTraceVmIdMask;
+        uint32                    threadTraceRandomSeed;
+        PerfExperimentShaderFlags threadTraceShaderTypeMask;
+        uint32                    threadTraceIssueMask;
+        bool                      threadTraceWrapBuffer;
+        uint32                    threadTraceStallBehavior;
+        bool                      threadTraceStallAllSimds;
+        bool                      threadTraceExcludeNonDetailShaderData;
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 899
+        bool                      threadTraceEnableExecPop;
+#endif
+    } optionValues;
+};
+
+/// Reports thread trace data written when the trace is stopped (copied from internal SQ registers).
+struct ThreadTraceInfoData
+{
+    uint32 curOffset;     ///< Contents of SQ_THREAD_TRACE_WPTR register.
+    uint32 traceStatus;   ///< Contents of SQ_THREAD_TRACE_STATUS register.
+    uint32 writeCounter;  ///< Contents of SQ_THREAD_TRACE_CNTR register.
+};
+
+/// Describes the layout of a single shader engine's thread trace data.
+struct ThreadTraceSeLayout
+{
+    uint32  shaderEngine;  ///< Shader engine index.
+    uint32  computeUnit;   ///< Compute unit index.
+    gpusize infoOffset;    ///< Offset to ThreadTraceInfoData in memory.
+    gpusize infoSize;      ///< Size in bytes reserved for ThreadTraceInfoData.
+    gpusize dataOffset;    ///< Offset in bytes to the actual trace data.
+    gpusize dataSize;      ///< Amount of trace data, in bytes.
+};
+
+/// Describes how the thread trace data is laid out.
+struct ThreadTraceLayout
+{
+    uint32              traceCount;  ///< Number of entries in traces[].
+    ThreadTraceSeLayout traces[1];   ///< ThreadTraceSeLayout repeated (traceCount - 1) times.
+};
+
+/// Describes a single SPM counter instance.
+struct SpmCounterData
+{
+    GpuBlock gpuBlock; ///< The kind of GPU block this counter measured.
+    uint32   instance; ///< Which specific global block instance this counter measured.
+    uint32   eventId;  ///< The event that was measured by this counter.
+    uint32   offsetLo; ///< Byte offset within each sample to the lower 16-bit half of the counter data.
+    uint32   offsetHi; ///< Byte offset within each sample to the upper 16-bit half of the counter data.
+    bool     is32Bit;  ///< If the client must combine the independent 16-bit halves into a single 32-bit value.
+                       ///  If this is false offsetLo points to the full 16-bit data value and offsetHi is ignored.
+};
+
+/// All information required to parse the counter data out of a SpmTrace results buffer.
+///
+/// Note that the hardware will continue to write samples to the SPM ring buffer even if it runs out of unused space.
+/// The hardware will simply wrap the ring's write pointer back around to the first sample's location. Each subsequent
+/// sample will overwrite the oldest sample in the ring. When the trace is finished we will have at most @ref
+/// maxNumSamples valid samples.
+///
+/// PAL doesn't zero out the ring memory so it's generally hard for the client to distinguish valid samples from random
+/// data present in unused sample locations. PAL does guarantee that the final sample location in the ring has its
+/// timestamp zeroed out before the SPM trace starts. This means this last timestamp will only be non-zero if the ring
+/// has completely filled up and the WrPtr has wrapped one or more times. The client must inspect this timestamp when
+/// parsing the sample data:
+/// 1. The last timestamp is zero. The ring did not wrap. The oldest sample is at @ref sampleOffset. The ring's write
+///    pointer tells us how many samples were written. From the write pointer onwards the ring contains invalid data.
+/// 2. The last timestamp is non-zero. The ring did wrap. The ring's write pointer points to the oldest sample,
+///    effectively a random sample offset into the ring. The full ring contains valid sample data but it's not in
+///    oldest-to-newest order, it's shifted. The client can walk the ring from the write pointer's location (wrapping
+///    as they go) to parse all @ref maxNumSamples samples out in oldest-to-newest order.
+struct SpmTraceLayout
+{
+    gpusize offset;           ///< Byte offset into the bound GPU memory where the spm trace data begins.
+                              ///  The @ref wrPtrOffset and @ref sampleOffset are relative to this value.
+    uint32  wrPtrOffset;      ///< Byte offset within SPM trace data to the HW's write pointer (WrPtr) DWORD.
+                              ///  The WrPtr's value is an offset relative to @ref sampleOffset. Don't assume this is
+                              ///  a byte offset (see @ref wrPtrGranularity). The WrPtr's value shows where the HW's
+                              ///  theoretical next sample would go. This value may wrap back to zero if the HW runs of
+                              ///  space in the SPM ring buffer.
+    uint32  wrPtrGranularity; ///< The WrPtr's granularity. Multiply WrPtr's value by this value to get a byte offset.
+    uint32  sampleOffset;     ///< Byte offset within the SPM trace data to the array of samples. The HW will write the
+                              ///  first sample here but it will be overwritten if the ring wraps (see the top comment).
+    uint32  sampleStride;     ///< The distance between consecutive samples in bytes. May include empty padding.
+    uint32  maxNumSamples;    ///< The maximum number of samples the HW can write before wrapping. The SPM ring buffer
+                              ///  ends at sampleOffset + sampleStride * maxNumSamples.
+    uint32  numCounters;      ///< The true length of counterData. The client must allocate extra memory for the array.
+
+    SpmCounterData counterData[1]; ///< The layout and identity of the counters in the samples.
+};
+
+/// Represents the information that is stored in the DF SPM trace metadata buffer.
+struct DfSpmTraceMetadataLayout
+{
+    uint32 numRecordPairs; ///< The number of 64-byte blocks written by this trace. There are two time segments
+                           ///< per 64-byte block so we have to check the lastSpmPkt bit to see which half of
+                           ///< the last 64-byte block is the last packet.
+    uint32 padding;        ///< Padding to match what the compiler does by default.
+    uint64 beginTimestamp; ///< The DF timestamp at the start of the DF SPM trace.
+    uint64 endTimestamp;   ///< The DF timestamp at the finish of the DF SPM trace.
+};
+
+/// Specifies properties for creation of an @ref IPerfExperiment object.  Input structure to
+/// IDevice::CreatePerfExperiment().
+struct PerfExperimentCreateInfo
+{
+    union
+    {
+        struct
+        {
+            uint32 cacheFlushOnCounterCollection :  1;
+            uint32 sampleInternalOperations      :  1;
+            uint32 sqShaderMask                  :  1;
+            uint32 sqWgpShaderMask               :  1;
+            uint32 reserved                      : 28;
+        };
+        uint32 u32All;
+    } optionFlags;
+
+    struct
+    {
+        bool                      cacheFlushOnCounterCollection;
+        bool                      sampleInternalOperations;
+        PerfExperimentShaderFlags sqShaderMask;    ///< GpuBlock::Sq counters only look at these shader types.
+        PerfExperimentShaderFlags sqWgpShaderMask; ///< GpuBlock::SqWgp counters only look at these shader types.
+    } optionValues;
+};
+
+/**
+ ***********************************************************************************************************************
+ * @interface IPerfExperiment
+ * @brief     Set of performance profiling activities to be performed over a specific range of commands in a command
+ *            buffer.
+ *
+ * @warning The details of building a performance experiment are not very well documented here.  Please see your local
+ *          hardware performance expert for more details until this documentation can be fully fleshed out.
+ *
+ * @see IDevice::CreatePerfExperiment
+ ***********************************************************************************************************************
+ */
+class IPerfExperiment : public IGpuMemoryBindable
+{
+public:
+    /// Adds the specified performance counter to be tracked as part of this perf experiment.
+    ///
+    /// @param [in] counterInfo Specifies which counter to add: which hardware block, instance, any options, etc.
+    ///
+    /// @returns Success if the counter was successfully added to the experiment, otherwise an appropriate error code.
+    virtual Result AddCounter(
+        const PerfCounterInfo& counterInfo) = 0;
+
+    /// Queries the layout of counter results in memory for this perf experiment.
+    ///
+    /// @param [out] pLayout Layout describing the begin and end offset of each counter in the resulting GPU memory once
+    ///                      this perf experiment is executed.  Should correspond with counters added via AddCounter().
+    ///
+    /// @returns Success if the layout was successfully returned in pLayout, otherwise an appropriate error code.
+    virtual Result GetGlobalCounterLayout(
+        GlobalCounterLayout* pLayout) const = 0;
+
+    /// Addes the specified thread trace to be recorded as part of this perf experiment.
+    ///
+    /// @param [in] traceInfo Specifies what type of trace to record, which block instance to trace, and options, etc.
+    ///
+    /// @returns Success if the trace was successfully added to the experiment, otherwise an appropriate error code.
+    virtual Result AddThreadTrace(
+        const ThreadTraceInfo& traceInfo) = 0;
+
+    /// Adds the specified DfSpmTrace to be recorded as part of this perf experiment.
+    ///
+    /// @param [in] dfSpmCreateInfo Specifies the parameters of the df spm trace and
+    /// provides the list of perf counters.
+    ///
+    /// @returns Success if the df spm trace was successfully added to the experiment,
+    /// otherwise and appropriate error code.
+    virtual Result AddDfSpmTrace(
+        const SpmTraceCreateInfo& dfSpmCreateInfo) = 0;
+
+    /// Adds the specified SpmTrace to be recorded as part of this perf experiment.
+    ///
+    /// @param [in] spmCreateInfo Specifies the parameters of the spm trace and provides the list of perf counters.
+    ///
+    /// @returns Success if the spm trace was successfully added to the experiment, otherwise an appropriate error code.
+    virtual Result AddSpmTrace(
+        const SpmTraceCreateInfo& spmCreateInfo) = 0;
+
+    /// Queries the layout of thread trace results in memory for this perf experiment.
+    ///
+    /// @param [out] pLayout Layout describing how the results of each thread trace will be written to GPU memory when
+    ///                      this perf experiment is executed.  Should correspond with counters added via AddTrace().
+    ///
+    /// @returns Success if the layout was successfully returned in pLayout, otherwise an appropriate error code.
+    virtual Result GetThreadTraceLayout(
+        ThreadTraceLayout* pLayout) const = 0;
+
+    /// Queries the layout of streaming counter trace results in memory for this perf experiment.
+    ///
+    /// The caller is expected to call this function twice. The first time with pLayout->numCounters = 0 which prompts
+    /// PAL to only set numCounters to the correct number of SPM counters and return. The second call with a non-zero
+    /// numCounters prompts PAL to fill out the full structure and counterData array.
+    ///
+    /// Note that @ref SpmTraceLayout contains a variable length array. The caller must allocate enough memory for
+    /// an additional "numCounters - 1" copies of @ref SpmCounterData.
+    ///
+    /// @param [out] pLayout Layout describing the layout of the streaming counter trace results in the resulting
+    ///                      GPU memory once this perf experiment is executed.
+    ///
+    /// @returns Success if the layout was successfully returned in pLayout, otherwise an appropriate error code.
+    virtual Result GetSpmTraceLayout(
+        SpmTraceLayout* pLayout) const = 0;
+
+    /// Finalizes the performance experiment preparing it for execution.
+    ///
+    /// @returns Success if the operation executed successfully, otherwise an appropriate error code.
+    virtual Result Finalize() = 0;
+
+    /// Returns the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @returns Pointer to client data.
+    void* GetClientData() const
+    {
+        return m_pClientData;
+    }
+
+    /// Sets the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @param  [in]    pClientData     A pointer to arbitrary client data.
+    void SetClientData(
+        void* pClientData)
+    {
+        m_pClientData = pClientData;
+    }
+
+protected:
+    /// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
+    /// called the proper create method.
+    IPerfExperiment() : m_pClientData(nullptr) {}
+
+    /// @internal Destructor.  Prevent use of delete operator on this interface.  Client must destroy objects by
+    /// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
+    /// object on their own.
+    virtual ~IPerfExperiment() { }
+
+private:
+    /// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
+    /// and set via SetClientData().
+    /// For non-top-layer objects, this will point to the layer above the current object.
+    void* m_pClientData;
+};
+
+} // Pal
@@ -0,0 +1,896 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palPipeline.h
+ * @brief Defines the Platform Abstraction Library (PAL) IPipeline interface and related types.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palGpuMemoryBindable.h"
+#include "palDestroyable.h"
+#include "palImage.h"
+#include "palShaderLibrary.h"
+#include "palSpan.h"
+#include <utility>
+
+namespace Util
+{
+namespace Abi
+{
+union ApiHwShaderMapping;
+enum class HardwareStage : uint32;
+}
+
+namespace HsaAbi
+{
+struct KernelArgument;
+}
+}
+
+namespace Pal
+{
+struct GpuMemSubAllocInfo;
+enum class PrimitiveTopology : uint8;
+
+/// Specifies a shader type (i.e., what stage of the pipeline this shader was written for).
+enum class ShaderType : uint32
+{
+    Compute = 0,
+    Task,
+    Vertex,
+    Hull,
+    Domain,
+    Geometry,
+    Mesh,
+    Pixel,
+
+    Count
+};
+
+/// Number of shader program types supported by PAL.
+constexpr uint32 NumShaderTypes = static_cast<uint32>(ShaderType::Count);
+
+/// Maximum number of viewports.
+constexpr uint32 MaxViewports = 16;
+
+/// Maximum number of supported stream-output declaration entries by any PAL device.
+constexpr uint32 MaxStreamOutEntries = 512;
+
+/// Specifies a general primitive category without differentiating between a strip or list and without specifying
+/// whether a the primitive will include adjacency info or not.
+enum class PrimitiveType : uint32
+{
+    Point    = 0x0,
+    Line     = 0x1,
+    Triangle = 0x2,
+    Rect     = 0x3,
+    Quad     = 0x4,
+    Patch    = 0x5,
+    Count
+};
+
+/// Specifies the target range of Z values after viewport transform.
+enum class DepthRange : uint32
+{
+    ZeroToOne        = 0x0,
+    NegativeOneToOne = 0x1,
+};
+
+/// Specifies whether the v/t texture coordinates of a point sprite map 0 to 1 from top to bottom or bottom to top.
+enum class PointOrigin : uint32
+{
+    UpperLeft = 0x0,
+    LowerLeft = 0x1,
+    Count
+};
+
+/// Specifies primitive's shade mode.
+enum class ShadeMode : uint32
+{
+    Gouraud = 0x0,      ///< Gouraud shading mode, pixel shader input is interpolation of vertex
+    Flat    = 0x1,      ///< Flat shading mode, pixel shader input from provoking vertex
+    Count
+};
+
+/// Defines a logical operation applied between the color coming from the pixel shader and the current value in the
+/// target image.
+enum class LogicOp : uint32
+{
+    Copy         = 0x0,
+    Clear        = 0x1,
+    And          = 0x2,
+    AndReverse   = 0x3,
+    AndInverted  = 0x4,
+    Noop         = 0x5,
+    Xor          = 0x6,
+    Or           = 0x7,
+    Nor          = 0x8,
+    Equiv        = 0x9,
+    Invert       = 0xA,
+    OrReverse    = 0xB,
+    CopyInverted = 0xC,
+    OrInverted   = 0xD,
+    Nand         = 0xE,
+    Set          = 0xF,
+};
+
+/// Shader Engine Dispatch Interleave Size
+///
+/// This determines how many Threads or Threadgroups are sent to one SE before switching to the next SE.
+/// Work is always distributed in Threadgroups though.
+///
+/// The 1D values are specified in Threads and the Threadgroups are walked in a 1D typewriter fashion.
+/// The 2D values are specified in Threadgroups and also walked in typewriter fashion (in groups of the 2D pattern).
+///
+/// Clients should check for 1D and 2D support separately in:
+///   - DeviceProperties::gfxipProperties::flags::support1dDispatchInterleave
+///   - DeviceProperties::gfxipProperties::flags::support2dDispatchInterleave
+///
+/// Default will result in "Disable" for chips which do not support 1D or 2D.
+/// Disable means that every Threadgroup is issued to the next SE.
+enum class DispatchInterleaveSize : uint32
+{
+    Default,
+    Disable,
+
+    _1D_64_Threads,
+    _1D_128_Threads,
+    _1D_256_Threads,
+    _1D_512_Threads,
+
+    _2D_1x1_ThreadGroups,
+    _2D_1x2_ThreadGroups,
+    _2D_1x4_ThreadGroups,
+    _2D_1x8_ThreadGroups,
+    _2D_1x16_ThreadGroups,
+
+    _2D_2x1_ThreadGroups,
+    _2D_2x2_ThreadGroups,
+    _2D_2x4_ThreadGroups,
+    _2D_2x8_ThreadGroups,
+
+    _2D_4x1_ThreadGroups,
+    _2D_4x2_ThreadGroups,
+    _2D_4x4_ThreadGroups,
+
+    _2D_8x1_ThreadGroups,
+    _2D_8x2_ThreadGroups,
+
+    _2D_16x1_ThreadGroups,
+
+    Count,
+};
+
+/// Specifies whether to override binning setting for pipeline. Enum value of Default follows the PBB global setting.
+/// Enable or Disable value overrides PBB global setting for the pipeline and sets binning accordingly.
+enum class BinningOverride : uint32
+{
+    Default = 0x0,
+    Disable = 0x1,
+    Enable  = 0x2,
+    Count
+};
+
+/// GPU behavior is controlled by LDS_GROUP_SIZE.
+enum class LdsPsGroupSizeOverride : uint32
+{
+    Default     = 0x0,
+    SingleWave  = 0x1,
+    DoubleWaves = 0x2
+};
+
+/// Tri-state enum which controls enabling or disabling a feature or behavior, or letting PAL select a sensible default
+enum class OverrideMode : int32
+{
+    Default  = -1, ///< PAL selects the default behavior, which could be either enabled or disabled.
+    Disabled = 0,  ///< Force to disabled. Equal to set to False.
+    Enabled  = 1,  ///< Force to enabled. Equal to set to True.
+};
+
+/// Enumerates the depth clamping modes a pipeline can use.
+enum class DepthClampMode : uint32
+{
+    Viewport    = 0x0,  ///< Clamps to the viewport min/max depth bounds
+    _None       = 0x1,  ///< Disables depth clamping
+#if PAL_BUILD_SUPPORT_DEPTHCLAMPMODE_ZERO_TO_ONE
+    ZeroToOne   = 0x2,  ///< Clamps between 0.0 and 1.0.
+#endif
+
+    // Unfortunately for Linux clients, X.h includes a "#define None 0" macro.  Clients have their choice of either
+    // undefing None before including this header or using _None when dealing with PAL.
+#ifndef None
+    None = _None,       ///< Disables depth clamping
+#endif
+};
+
+/// Common flags controlling creation of both compute and graphics pipeline.
+union PipelineCreateFlags
+{
+    struct
+    {
+        uint32 clientInternal        :  1; ///< Internal pipeline not created by the application.
+        uint32 reverseWorkgroupOrder :  1; ///< Indicates that any Dispatch using this pipeline should execute in
+                                           ///  reverse workgroup order. This superceeds the flag on the CommandBuffer
+                                           ///  (dispatchPingPongWalk) - always forcing reverse workgroup order! This
+                                           ///  is a best effort as not all implementations or Queues may support this.
+        uint32 reserved              : 30; ///< Reserved for future use.
+    };
+    uint32 u32All;                         ///< Flags packed as 32-bit uint.
+};
+
+/// Constant definining the max number of view instance count that is supported.
+constexpr uint32 MaxViewInstanceCount = 6;
+
+/// Specifies graphic pipeline view instancing state.
+struct ViewInstancingDescriptor
+{
+    uint32         viewInstanceCount;                           ///< The view instance count of the graphic pipeline
+    uint32         viewId[MaxViewInstanceCount];                ///< The view instance ids.
+    uint32         renderTargetArrayIdx[MaxViewInstanceCount];  ///< The instance render target array index, can be
+                                                                ///  used in hardware accelerated stereo rendering.
+    uint16         viewportArrayIdx[MaxViewInstanceCount];      ///< The instance viewport array index, can be
+                                                                ///  used in hardware accelerated stereo rendering.
+    bool           enableMasking;                               ///< Indicate whether instance masking is enabled.
+};
+
+// Specifies the input parameters for the MSAA coverage out feature.  MSAA coverage out is used in conjunction with a
+// single sampled color image.  This feature exports a mask indicating which samples would have been used if the
+// image had been multi-sampled.  The mask is exported to the specified channel of the MRT pointing to the rendered
+// image.  That is, the MRT must be an active bound render target.  This MSAA mask data can then be post-processed.
+struct MsaaCoverageOutDescriptor
+{
+    union
+    {
+        struct
+        {
+            uint32  enable        :  1; ///< Set to true to enable render target channel output
+            uint32  numSamples    :  4; ///< Number of samples to export
+            uint32  mrt           :  3; ///< Which MRT to export to.
+            uint32  channel       :  2; ///< Which channel to export to (x = 0, y = 1, z = 2, w = 3)
+            uint32  reserved      : 22;
+        };
+
+        uint32  u32All;
+    } flags;
+};
+
+/// Specifies properties about an indirect function belonging to a compute @ref IPipelne object.  Part of the input
+/// structure to IDevice::CreateComputePipeline().
+struct ComputePipelineIndirectFuncInfo
+{
+    const char*  pSymbolName; ///< ELF Symbol name for the associated function.  Must not be null.
+    gpusize      gpuVirtAddr; ///< [out] GPU virtual address of the function.  This is computed by PAL during
+                              ///  pipeline creation.
+};
+
+/// Specifies properties for creation of a compute @ref IPipeline object.  Input structure to
+/// IDevice::CreateComputePipeline().
+struct ComputePipelineCreateInfo
+{
+    PipelineCreateFlags flags;                 ///< Flags controlling pipeline creation.
+
+    const void*         pPipelineBinary;       ///< Pointer to Pipeline ELF binary implementing the Pipeline ABI
+                                               ///  interface. The Pipeline ELF contains pre-compiled shaders,
+                                               ///  register values, and additional metadata.
+    size_t              pipelineBinarySize;    ///< Size of Pipeline ELF binary in bytes.
+    uint32              maxFunctionCallDepth;  ///< Maximum depth for indirect function calls. Not used for a new
+                                               ///  path ray-tracing pipeline as the compiler has pre-calculated
+                                               ///  stack requirements.
+    bool disablePartialDispatchPreemption; ///< Prevents scenarios where a subset of the dispatched thread groups are
+                                           ///  preempted and the remaining thread groups run to completion. This
+                                           ///  can occur when thread group granularity preemption is available and
+                                           ///  instruction level (CWSR) is not. This setting is useful for allowing
+                                           ///  dispatches with interdependent thread groups.
+    DispatchInterleaveSize interleaveSize; ///< Controls how many thread groups are sent to one SE before switching to
+                                           ///  the next one.
+
+    /// PAL expects a fixed 3D thread group size for each compute pipeline but the HSA ABI supports dynamic group sizes.
+    /// If this pipeline's ELF binary metadata doesn't specify a fixed thread group size, this should be used to force
+    /// a particular thread group size. If this extent is set to all zeros PAL will use the metadata's group size.
+    /// This field is not supported on PAL ABI ELFs, it should be set to all zeros.
+    Extent3d threadsPerGroup;
+    TriState groupLaunchGuarantee; ///< Force the group launch guarantee mechanism on or off. This feature will throttle
+                                   ///  issuing of low priority waves when it detects too many higher priority waves are
+                                   ///  failing to schedule due to resource contraints.
+
+    const char* pKernelName; ///< When create pipeline with hsa ELF binary of multiple kernels, need to set one
+                             ///  kernel to create the pipeline. null means only one kernel in ELF binary.
+
+};
+
+/// Specifies information about the viewport behavior of an assembled graphics pipeline.  Part of the input
+/// structure @ref GraphicsPipelineCreateInfo.
+struct ViewportInfo
+{
+    bool       depthClipNearEnable; ///< Enable clipping based on Near Z coordinate.
+    bool       depthClipFarEnable;  ///< Enable clipping based on Far Z coordinate.
+    DepthRange depthRange;          ///< Specifies Z dimensions of screen space (i.e., post viewport transform:
+                                    ///  0 to 1 or -1 to 1).
+};
+
+/// Specifies edgeRule for rasterization
+enum class EdgeRuleMode : uint32
+{
+    D3dCompliant    = 0x0,   ///< Use rasterization edge-rules which comply with the D3D spec.
+    OpenGlDefault   = 0x1,   ///< Use rasterization edge-rules compatible with the default OpenGL driver.
+};
+
+/// Specifies Rasterizer state in properties for creation of a graphics
+struct RasterizerState
+{
+    PointOrigin     pointCoordOrigin;          ///< Controls texture coordinate orientation for point sprites.
+    bool            expandLineWidth;           ///< If true, line primitives will have their width expanded by 1/cos(a)
+                                               ///  where a is the minimum angle from horizontal or vertical.
+                                               ///  This can be used in conjunction with PS patching for a client to
+                                               ///  implement line antialiasing.
+    ShadeMode       shadeMode;                 ///< Specifies shading mode, Gouraud or Flat
+    bool            rasterizeLastLinePixel;    ///< Specifies whether to draw last pixel in a line.
+    bool            outOfOrderPrimsEnable;     ///< Enables out-of-order primitive rasterization.  PAL silently
+                                               ///  ignores this if it is unsupported in hardware.
+    bool            perpLineEndCapsEnable;     ///< Forces the use of perpendicular line end caps as opposed to
+                                               ///  axis-aligned line end caps during line rasterization.
+    BinningOverride binningOverride;           ///< Binning setting for this pipeline.
+
+    DepthClampMode  depthClampMode;            ///< Depth clamping behavior
+
+    union
+    {
+        struct
+        {
+            uint8 clipDistMaskValid : 1; ///< Whether or not @ref clipDiskMask, below, is valid.
+            uint8 cullDistMaskValid : 1; ///< Whether or not @ref cullDistMask, below, is valid.
+            uint8 reserved : 6;
+        };
+        uint8 u8All;                    ///< All the flags as a single value.
+    } flags;
+
+    uint8         cullDistMask;           ///< Mask of which cullDistance exports to leave enabled.
+    uint8         clipDistMask;           ///< Mask of which clipDistance exports to leave enabled.
+    bool          dx10DiamondTestDisable; ///< Disable DX10 diamond test during line rasterization.
+    EdgeRuleMode  edgeRule;
+};
+
+/// Specifies Per-MRT color target info in olor target state
+struct ColorTargetInfo
+{
+    SwizzledFormat swizzledFormat;      ///< Color target format and channel swizzle. Set the format to invalid
+                                        ///  if no color target will be bound at this slot.
+    uint8          channelWriteMask;    ///< Color target write mask.  Bit 0 controls the red channel, bit 1 is
+                                        ///  green, bit 2 is blue, and bit 3 is alpha.
+    bool           forceAlphaToOne;     ///< Treat alpha as one regardless of the shader output.  Ignored unless
+                                        ///  supportAlphaToOne is set in DeviceProperties.
+};
+
+/// Specifies color target state in properties for creation of a graphics
+struct ColorTargetState
+{
+    bool    alphaToCoverageEnable;           ///< Enable alpha to coverage.
+    bool    dualSourceBlendEnable;           ///< Blend state bound at draw time will use a dual source blend mode.
+    LogicOp logicOp;                         ///< Logic operation to perform.
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 904
+    bool    uavExportSingleDraw;             ///< When UAV export is enabled, acts as a hint that only a single draw
+                                             ///  is done on a color target with this or subsequent pipelines before
+                                             ///  a barrier. Improves performance by allowing pipelines to overlap.
+#endif
+
+    ColorTargetInfo target[MaxColorTargets]; ///< Per-MRT color target info.
+};
+
+/// Specifies properties for creation of a graphics @ref IPipeline object.  Input structure to
+/// IDevice::CreateGraphicsPipeline().
+struct GraphicsPipelineCreateInfo
+{
+    PipelineCreateFlags flags;                 ///< Flags controlling pipeline creation.
+
+    const void*         pPipelineBinary;       ///< Pointer to Pipeline ELF binary implementing the Pipeline ABI
+                                               ///  interface. The Pipeline ELF contains pre-compiled shaders,
+                                               ///  register values, and additional metadata.
+    size_t              pipelineBinarySize;    ///< Size of Pipeline ELF binary in bytes.
+    const IShaderLibrary** ppShaderLibraries;  ///< An array of graphics @ref IShaderLibrary object. pPipelineBinary
+                                               ///  and ppShaderLibraries can't be valid at the same time.
+    size_t              numShaderLibraries;    ///< Number of graphics shaderLibrary object in ppShaderLibraries.
+    bool                useLateAllocVsLimit;   ///< If set, use the specified lateAllocVsLimit instead of PAL internally
+                                               ///  determining the limit.
+    uint32              lateAllocVsLimit;      ///< The number of VS waves that can be in flight without having param
+                                               ///  cache and position buffer space. If useLateAllocVsLimit flag is set,
+                                               ///  PAL will use this limit instead of the PAL-specified limit.
+    bool                useLateAllocGsLimit;   ///< If set, use the specified lateAllocVsLimit instead of PAL internally
+                                               ///  determining the limit.
+    uint32              lateAllocGsLimit;      ///< Controls GS LateAlloc val (for pos/prim allocations NOT param cache)
+                                               ///  on NGG pipelines. Can be no more than 127.
+    struct
+    {
+        struct
+        {
+            PrimitiveType primitiveType;      ///< Basic primitive category: points, line, triangles, patches.
+            bool          topologyIsPolygon;  ///< Indicates that triangle primitives are combined to represent more
+                                              ///  complex polygons. Only valid for triangle primitive types.
+            uint32        patchControlPoints; ///< Number of control points per patch. Only required if primitiveType
+                                              ///  is PrimitiveType::Patch.
+        } topologyInfo; ///< Various information about the primitive topology that will be used with this pipeline.
+                        ///  All of this info must be consistent with the full topology specified by
+                        ///  ICmdBuffer::SetPrimitiveTopology() when drawing with this pipeline bound.
+
+        /// Number of vertex buffer slots which are accessed by this pipeline.  Behavior is undefined if the pipeline
+        /// tries to access a vertex buffer slot outside the range [0, vertexBufferCount).  It is generally advisable
+        /// to make this the minimum value possible because that reduces the number of vertex buffer slots PAL has to
+        /// maintain for this pipeline when recording command buffers.
+        uint32 vertexBufferCount;
+    } iaState;                   ///< Input assembler state.
+
+    RasterizerState  rsState;    ///< Rasterizer state.
+    ColorTargetState cbState;    ///< Color target state.
+
+    ViewInstancingDescriptor  viewInstancingDesc;  ///< Descriptor describes view instancing state
+                                                   ///  of the graphics pipeline
+    MsaaCoverageOutDescriptor coverageOutDesc;     ///< Descriptor describes input parameters for MSAA coverage out.
+    ViewportInfo              viewportInfo;        ///< Viewport info.
+    DispatchInterleaveSize    taskInterleaveSize;  ///< Ignored for pipelines without a task shader. For pipelines with
+                                                   ///  a task shader, controls how many thread groups are sent to one
+                                                   ///  SE before switching to the next one.
+    LdsPsGroupSizeOverride ldsPsGroupSizeOverride; ///< Whether to override ldsPsGroupSize setting for pipeline.
+
+    TriState groupLaunchGuarantee; ///< Force the group launch guarantee mechanism on or off. This feature will throttle
+                                   ///  issuing of low priority waves when it detects too many higher priority waves are
+                                   ///  failing to schedule due to resource contraints.
+    bool     noForceReZ;           ///< Disables the ability for PAL to force ReZ modes outside of what was chosen by
+                                   ///  the compiler for this pipeline.
+};
+
+/// The graphic pipeline view instancing information. This is used to determine if hardware accelerated stereo rendering
+/// can be enabled for a graphic pipeline.
+struct GraphicPipelineViewInstancingInfo
+{
+    union
+    {
+        struct
+        {
+            uint32 shaderUseViewId                  : 1;   ///< If any shader in pipeline uses view id.
+            uint32 gsExportRendertargetArrayIndex   : 1;   ///< If gs exports render target array index,
+                                                           ///  must be 0 if there is no gs.
+            uint32 gsExportViewportArrayIndex       : 1;   ///< If gs exports viewport array index,
+                                                           ///  must be 0 if there is no gs.
+            uint32 reserved                         : 29;  ///< Reserved for future use.
+        };
+        uint32 apiShaderFlags;
+    };
+
+    const ViewInstancingDescriptor* pViewInstancingDesc;    ///< View Instancing descriptor
+};
+
+/// Reports properties of a compiled pipeline.  This includes hashes for the pipeline and shaders that the client can
+/// use to correlate PAL pipeline/shader dumps with corresponding API-level pipelines/shaders.
+struct PipelineInfo
+{
+    const char* pName;                  ///< Non-owning pointer to a null-terminated string containing human-readable
+                                        ///< name of this pipeline. nullptr indicates no name was provided.
+
+    PipelineHash internalPipelineHash;  ///< 128-bit identifier extracted from this pipeline's ELF binary, composed of
+                                        ///  the state the compiler decided was appropriate to identify the compiled
+                                        ///  shaders.  The lower 64 bits are "stable"; the upper 64 bits are "unique".
+
+    struct
+    {
+        ShaderHash hash;      ///< Unique 128-bit identifier for this shader.  0 indicates there is no shader bound for
+                              ///  the corresponding shader stage.
+    } shader[NumShaderTypes]; ///< Array of per-shader pipeline properties.
+
+    union
+    {
+        struct
+        {
+            uint32 hsaAbi    : 1;  ///< This pipeline uses the HSA ABI (i.e. bind arguments not user-data)
+            uint32 usesCps   : 1;  ///< This pipeline uses continuations passing shaders (CPS). In an archive pipeline,
+                                   ///  this bit is set iff it is set in any constituent pipeline.
+            uint32 cpsGlobal : 1;  ///< If using continuations passing shaders (CPS), stack is in global rather than
+                                   ///  scratch. In an archive pipeline, this bit is set iff it is set in any
+                                   ///  constituent pipeline.
+            uint32 reserved  : 29; ///< Reserved for future use.
+        };
+        uint32 u32All;            ///< All flags combined as a single uint32.
+    } flags;                      ///< Pipeline properties.
+
+    struct
+    {
+        union
+        {
+            struct
+            {
+                uint32 perSampleShading : 1;    ///< Shader instructions want per-sample execution.
+                uint32 usesSampleMask   : 1;    ///< Shader is using sample mask.
+                uint32 enablePops       : 1;    ///< Primitive order pixel shader is enabled.
+                uint32 reserved         : 29;   ///< Reserved for future use.
+            };
+            uint32 u32All;                      ///< All flags combined as a single uint32.
+        } flags;
+    } ps;                                       ///< Pixel shader properties.
+
+    uint64 resourceMappingHash; ///< 64-bit hash of the resource mapping used when compiling the pipeline,
+                                ///  if available (0 otherwise).
+
+    /// The GPU address and size in bytes of the traceRays table, or { 0, 0 } if none.
+    /// The format of the table is private to the compiler.
+    /// Compiled code requires a buffer descriptor for the traceRays table to be passed in to the launch kernel
+    /// in a particular place.
+    gpusize traceRaysTable;
+    gpusize traceRaysTableSize;
+
+    /// Pointer to the the traceRays table for debugging purpose, or nullptr if none.
+    const char* pTraceRaysTable;
+
+    /// Pointer to and size in bytes of the shader identifiers table, or { nullptr, 0 } if none.
+    /// The table has an entry for each export and then each hit group specified when the pipeline was compiled.
+    /// Each entry has a pointer to the shader or hit group name in the same block of data (always 64 bits even
+    /// on a 32-bit host), then the 32 byte shader identifier whose format is agreed between the compiler and
+    /// GPURT's library code, for a total of 40 bytes per entry.
+    const void* pShaderIdentifiers;
+    size_t      shaderIdentifiersSize;
+
+    uint32 unifiedRgsNameHash; ///< 32-bit hash of unified RGS name, 0 otherwise
+
+};
+
+/// A structure that represents any 3D arrangement of threads or thread groups as part of a compute shader dispatch.
+///
+/// This structure is halfway between Extent3d and Offset3d, depending on the context it may represent an offset or
+/// an extent. Essentially it's meaning is tied to the concept of 3D thread or thread group grids rather than generic
+/// contexts like "extent" or "offset". Whether it represents threads or thread groups is also context specific.
+struct DispatchDims
+{
+    uint32 x; ///< Threads or thread groups in the X dimension.
+    uint32 y; ///< Threads or thread groups in the Y dimension.
+    uint32 z; ///< Threads or thread groups in the Z dimension.
+
+    /// Computes the volume of this 3D arrangement of threads or thread groups.
+    ///
+    /// @returns the total number of threads or threads groups this struct represents.
+    uint32 Flatten() const { return x * y * z; }
+};
+
+// There are some places where we'd like to directly cast DispatchDims to an array of three uint32s.
+static_assert(sizeof(DispatchDims) == sizeof(uint32) * 3, "DispatchDims not castable to uint32*");
+
+/// Component-wise addition of two DispatchDims.
+///
+/// @param [in] l  The left-hand argument.
+/// @param [in] r  The right-hand argument.
+///
+/// @returns A new DispatchDims which contains the sum of 'l' and 'r' along each dimension.
+inline DispatchDims operator+(DispatchDims l, DispatchDims r) { return {l.x + r.x, l.y + r.y, l.z + r.z}; }
+
+/// Component-wise addition of one DispatchDims into another.
+///
+/// @param [in] l  The left-hand argument.
+/// @param [in] r  The right-hand argument.
+///
+/// @returns A reference to 'l' after it is updated to the sum of 'l' and 'r'.
+inline DispatchDims& operator+=(DispatchDims& l, DispatchDims r) { return l = (l + r); }
+
+/// Component-wise multiplication of two DispatchDims.
+///
+/// @param [in] l  The left-hand argument.
+/// @param [in] r  The right-hand argument.
+///
+/// @returns A new DispatchDims which contains the product of 'l' and 'r' along each dimension.
+inline DispatchDims operator*(DispatchDims l, DispatchDims r) { return {l.x * r.x, l.y * r.y, l.z * r.z}; }
+
+/// Component-wise multiplication of one DispatchDims into another.
+///
+/// @param [in] l  The left-hand argument.
+/// @param [in] r  The right-hand argument.
+///
+/// @returns A reference to 'l' after it is updated to the product of 'l' and 'r'.
+inline DispatchDims& operator*=(DispatchDims& l, DispatchDims r) { return l = (l * r); }
+
+/// Used to represent API level shader stage.
+enum ShaderStageFlagBits : uint32
+{
+    ApiShaderStageCompute  = (1u << static_cast<uint32>(ShaderType::Compute)),
+    ApiShaderStageTask     = (1u << static_cast<uint32>(ShaderType::Task)),
+    ApiShaderStageVertex   = (1u << static_cast<uint32>(ShaderType::Vertex)),
+    ApiShaderStageHull     = (1u << static_cast<uint32>(ShaderType::Hull)),
+    ApiShaderStageDomain   = (1u << static_cast<uint32>(ShaderType::Domain)),
+    ApiShaderStageGeometry = (1u << static_cast<uint32>(ShaderType::Geometry)),
+    ApiShaderStageMesh     = (1u << static_cast<uint32>(ShaderType::Mesh)),
+    ApiShaderStagePixel    = (1u << static_cast<uint32>(ShaderType::Pixel)),
+};
+
+/// Reports shader stats. Multiple bits set in the shader stage mask indicates that multiple shaders have been combined
+/// due to HW support. The same information will be repeated for both the constituent shaders in this case.
+struct ShaderStats
+{
+    uint32             shaderStageMask;        ///< Indicates the stages of the pipeline this shader is
+                                               /// used for. If multiple bits are set, it implies
+                                               /// shaders were merged. See @ref ShaderStageFlagBits.
+    CommonShaderStats  common;                 ///< The shader compilation parameters for this shader.
+    /// Maximum number of VGPRs the compiler was allowed to use for this shader.  This limit will be the minimum
+    /// of any architectural restriction and any client-requested limit intended to increase the number of waves in
+    /// flight.
+    uint32             numAvailableVgprs;
+    /// Maximum number of SGPRs the compiler was allowed to use for this shader.  This limit will be the minimum
+    /// of any architectural restriction and any client-requested limit intended to increase the number of waves in
+    /// flight.
+    uint32             numAvailableSgprs;
+    size_t             isaSizeInBytes;         ///< Size of the shader ISA disassembly for this shader.
+    ShaderHash         palShaderHash;          ///< Internal hash of the shader compilation data used by PAL.
+
+    union
+    {
+        struct
+        {
+            uint32 writesUAV   : 1;     ///< This shader performs writes to UAVs.
+            uint32 writesDepth : 1;     ///< Indicates explicit depth writes performed by the shader stage.
+            uint32 streamOut   : 1;     ///< The shader performs stream out of shader generated data.
+            uint32 reserved    : 29;    ///< Reserved for future use.
+        };
+        uint32 u32All;                  ///< All flags combined as a single uint32.
+
+    } shaderOperations;                 ///< Flags depicting shader operations.
+
+    struct
+    {
+        DispatchDims numThreadsPerGroup; ///< Number of compute threads per thread group in X, Y, and Z dimensions.
+    } cs;                                ///< Parameters specific to compute shader only.
+
+    union
+    {
+        struct
+        {
+            uint8 copyShaderPresent : 1; ///< Indicates that the copy shader data is valid.
+            uint8 reserved          : 7; ///< Reserved for future use.
+        };
+        uint8 u8All;                     ///< All the flags as a single value.
+    } flags;                             ///< Flags related to this shader data.
+
+    CommonShaderStats  copyShader;       ///< This data is valid only when the copyShaderPresent flag above is set.
+};
+
+ /**
+  ***********************************************************************************************************************
+  * @interface IPipeline
+  * @brief     Monolithic object containing all shaders and a large amount of "shader adjacent" state.  Separate concrete
+  *            implementations will support compute or graphics pipelines.
+  *
+  * @see IDevice::CreateComputePipeline()
+  * @see IDevice::CreateGraphicsPipeline()
+  * @see IDevice::LoadPipeline()
+  ***********************************************************************************************************************
+  */
+class IPipeline : public IDestroyable
+{
+public:
+    /// Returns PAL-computed properties of this pipeline and its corresponding shaders.
+    ///
+    /// @returns Property structure describing this pipeline.
+    virtual const PipelineInfo& GetInfo() const = 0;
+
+    /// Returns a list of GPU memory allocations used by this pipeline.
+    ///
+    /// @param [in,out] pNumEntries    Input value specifies the available size in pAllocInfoList; output value
+    ///                                reports the number of GPU memory allocations.
+    /// @param [out]    pAllocInfoList If pAllocInfoList=nullptr, then pNumEntries is ignored on input.  On output it
+    ///                                will reflect the number of allocations that make up this pipeline.  If
+    ///                                pAllocInfoList!=nullptr, then on input pNumEntries is assumed to be the number
+    ///                                of entries in the pAllocInfoList array.  On output, pNumEntries reflects the
+    ///                                number of entries in pAllocInfoList that are valid.
+    /// @returns Success if the allocation info was successfully written to the buffer.
+    ///          + ErrorInvalidValue if the caller provides a buffer size that is different from the size needed.
+    ///          + ErrorInvalidPointer if pNumEntries is nullptr.
+    virtual Result QueryAllocationInfo(
+        size_t*                    pNumEntries,
+        GpuMemSubAllocInfo* const  pAllocInfoList) const = 0;
+
+    /// Gives the client access to the resource ID used for internal Pal events.
+    /// EX: Resource Create, Resource Bind, Resource Destroy.
+    ///
+    /// @returns The Resource ID.
+    virtual const void* GetResourceId() const = 0;
+
+    /// Obtains the binary code object for this pipeline.
+    ///
+    /// @param [in, out] pSize  Represents the size of the shader ISA code.
+    ///
+    /// @param [out] pBuffer    If non-null, the pipeline ELF is written in the buffer. If null, the size required
+    ///                         for the pipeline ELF is given out in the location pSize.
+    ///
+    /// @returns Success if the pipeline binary was fetched successfully.
+    ///          +ErrorUnavailable if the pipeline binary was not fetched successfully.
+    virtual Result GetCodeObject(
+        uint32*  pSize,
+        void*    pBuffer) const = 0;
+
+    /// Obtains the pointer of code object with ELF format according to the shader type. Returned ELF object is not
+    /// guaranteed to be unique with different shader type, because a single code object can contain multiple shaders.
+    ///
+    /// @param [in] shaderType The shader stage for which the code object are requested.
+    /// @param [out] pSize     The size of the ELF binary.
+    ///
+    /// @returns The pointer of ELF binary which contains requested shader stage.
+    virtual const void* GetCodeObjectWithShaderType(
+        ShaderType shaderType,
+        size_t*    pSize) const = 0;
+
+    /// Obtains the shader pre and post compilation stats/params for the specified shader stage.
+    ///
+    /// @param [in]  shaderType The shader stage for which the stats are requested.
+    ///
+    /// @param [out] pShaderStats Pointer to the ShaderStats structure which will be filled with the shader stats for
+    ///                           the shader stage mentioned in shaderType. This cannot be nullptr.
+    /// @param [in]  getDisassemblySize If set to true performs disassembly on the shader binary code and reports the
+    ///                                 size of the disassembly string in ShaderStats::isaSizeInBytes. Else reports 0.
+    /// @returns Success if the stats were successfully obtained for this shader, including the shader disassembly size.
+    ///          +ErrorUnavailable if a wrong shader stage for this pipeline was specified, or if some internal error
+    ///                           occured.
+    virtual Result GetShaderStats(
+        ShaderType   shaderType,
+        ShaderStats* pShaderStats,
+        bool         getDisassemblySize) const = 0;
+
+    /// Obtains the compiled shader ISA code for the shader stage specified.
+    ///
+    /// @param [in]  shaderType The shader stage for which the shader cache entry is requested.
+    ///
+    /// @param [in, out] pSize  Represents the size of the shader ISA code.
+    ///
+    /// @param [out] pBuffer    If non-null, the shader ISA code is written in the buffer. If null, the size required
+    ///                         for the shader ISA is given out in the location pSize.
+    ///
+    /// @returns Success if the shader ISA code was fetched successfully.
+    ///          +ErrorUnavailable if the shader ISA code was not fetched successfully.
+    virtual Result GetShaderCode(
+        ShaderType shaderType,
+        size_t*    pSize,
+        void*      pBuffer) const = 0;
+
+    /// Obtains the generated performance data for the shader stage specified.
+    ///
+    /// @param [in]      hardwareStage  The hardware stage of the shader which the performance data is requested.
+    /// @param [in, out] pSize          Represents the size of the performance data.
+    /// @param [out]     pBuffer        If non-null, the performance data is written in the buffer. If null, the size
+    ///                                 required for the performance data is given out in the location pSize.
+    ///
+    /// @returns Success if the performance data was fetched successfully.
+    ///          +ErrorUnavailable if the performance data was not fetched successfully.
+    virtual Result GetPerformanceData(
+        Util::Abi::HardwareStage hardwareStage,
+        size_t*                  pSize,
+        void*                    pBuffer) = 0;
+
+    /// Notifies PAL that this pipeline may make indirect function calls to any function contained within any of the
+    /// specified @ref IShaderLibrary objects.  This gives PAL a chance to perform any late linking steps required to
+    /// valid execution of the possible function calls (this could include adjusting hardware resources such as GPRs
+    /// or LDS space for the pipeline).
+    ///
+    /// This may be called multiple times on the same pipeline object.  Subsequent calls do not invalidate the result
+    /// of previous calls.
+    ///
+    /// This must be called prior to binding this pipeline to a command buffer which will make function calls into any
+    /// shader function contained within any of the specified libraries.  Failure to comply is an error and will result
+    /// in undefined behavior.
+    ///
+    /// Currently only supported on compute pipelines.
+    ///
+    /// @param [in] ppLibraryList  List of @ref IShaderLibrary object to link with.
+    /// @param [in] libraryCount   Number of valid library objects in the ppLibraryList array.
+    ///
+    /// @returns Success if the operation is successful.  Other return codes may include:
+    ///          + ErrorUnavailable if called on a graphics pipeline.
+    ///          + ErrorBadPipelineData if any of the libraries in ppLibraryList are not compatible with this pipeline.
+    ///            Reasons for incompatibility include (but are not limited to) different user-data mappings, different
+    ///            wavefront sizes, and other reasons.
+    virtual Result LinkWithLibraries(
+        const IShaderLibrary*const* ppLibraryList,
+        uint32                      libraryCount) = 0;
+
+    /// Sets the stack size for indirect function calls made by this pipeline. This may be smaller than or equal to the
+    /// stack size already determined during pipeline creation or during an earlier call to LinkWithLibraries() because
+    /// the client has access to more information about which functions contained in those libraries (or in the pipeline
+    /// itself) are actually going to be called.
+    ///
+    /// Note that a future call to LinkWithLibraries() will invalidate this value and this should
+    /// be called again.
+    ///
+    /// @param [in] stackSizeInBytes  Client-specified stack size, in bytes.
+    virtual void SetStackSizeInBytes(
+        uint32 stackSizeInBytes) = 0;
+
+    /// Retrieve the stack sizes managed by compiler, including the frontend stack and the backend stack.
+    ///
+    /// @param [out] pSizes  To be filled with both the frontend stack size and the backend stack size, in bytes.
+    ///
+    /// @returns SUCCESS
+    virtual Result GetStackSizes(
+        CompilerStackSizes* pSizes) const = 0;
+
+    /// Returns the API shader type to hardware stage mapping for the pipeline.
+    ///
+    /// @returns The appropriate mapping for this pipeline.
+    virtual Util::Abi::ApiHwShaderMapping ApiHwShaderMapping() const = 0;
+
+    /// Given the zero-based position of a kernel argument, return a pointer to that argument's metadata.
+    ///
+    /// @note Only compute pipelines using the HSA ABI have kernel arguments.
+    ///
+    /// @param [in] index  The zero-based position of the kernel argument to query.
+    ///
+    /// @returns A pointer to the kernel argument's metadata, or null if this pipeline doesn't have this argument.
+    virtual const Util::HsaAbi::KernelArgument* GetKernelArgument(uint32 index) const = 0;
+
+    /// Returns the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @returns Pointer to client data.
+    void* GetClientData() const { return m_pClientData; }
+
+    /// Sets the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @param  [in]    pClientData     A pointer to arbitrary client data.
+    void SetClientData(
+        void* pClientData)
+    {
+        m_pClientData = pClientData;
+    }
+
+    /// Get the array of underlying pipelines that this pipeline contains. For a normal non-multi-pipeline,
+    /// this returns a single-entry array pointing to the same IPipeline. For a multi-pipeline compiled in
+    /// dynamic launch mode, this returns an empty array. The contents of the returned array remain valid
+    /// until the IPipeline is destroyed.
+    ///
+    /// @returns The array of underlying pipelines.
+    virtual Util::Span<const IPipeline* const> GetPipelines() const = 0;
+
+    /// Get the array of underlying shader libraries that this pipeline contains. For a normal non-multi-pipeline,
+    /// this returns the empty array. The contents of the returned array remain valid until the IPipeline is
+    /// destroyed.
+    ///
+    /// @returns The array of underlying shader libraries.
+    virtual Util::Span<const IShaderLibrary* const> GetLibraries() const { return {}; }
+
+protected:
+    /// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
+    /// called the proper create method.
+    IPipeline() : m_pClientData(nullptr) {}
+
+    /// @internal Destructor.  Prevent use of delete operator on this interface.  Client must destroy objects by
+    /// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
+    /// object on their own.
+    virtual ~IPipeline() { }
+
+private:
+    /// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
+    /// and set via SetClientData().
+    /// For non-top-layer objects, this will point to the layer above the current object.
+    void* m_pClientData;
+
+    IPipeline(const IPipeline&) = delete;
+    IPipeline& operator=(const IPipeline&) = delete;
+};
+
+} // Pal
@@ -0,0 +1,645 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palPlatform.h
+ * @brief Defines the Platform Abstraction Library (PAL) IPlatform interface and related types.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palSysMemory.h"
+#include "palMemTrackerImpl.h"
+#include "palDestroyable.h"
+#include "palDeveloperHooks.h"
+
+// DevDriver forward declarations.
+namespace DevDriver
+{
+class DevDriverServer;
+namespace EventProtocol
+{
+class EventServer;
+}
+class SettingsRpcService;
+}
+
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 890
+namespace SettingsRpcService
+{
+class SettingsService;
+}
+#endif
+
+#if PAL_BUILD_RDF
+// GpuUtil forward declarations.
+namespace GpuUtil
+{
+class TraceSession;
+}
+#endif
+
+#if PAL_ENABLE_LOGGING
+namespace Util
+{
+struct DbgLoggerFileSettings;
+}
+#endif
+
+namespace Pal
+{
+
+// Forward declarations.
+class  IDevice;
+class  IScreen;
+struct PalPlatformSettings;
+enum class PalEvent : uint32;
+
+/// Maximum number of Devices possibly attached to a system.
+constexpr uint32 MaxDevices = 16;
+/// Maximum number of Screens possibly attached to a Device.
+constexpr uint32 MaxScreensPerDevice = 6;
+/// Maximum number of Screens possibly attached to a system.
+constexpr uint32 MaxScreens = (MaxScreensPerDevice * MaxDevices);
+constexpr uint32 MaxModePerScreen = 64;
+
+/// 32-bit PAL version identifier.
+///
+/// Version number of the PAL library.  Major version is bumped on every promotion from stg/pal to stg/pal_prm.  Minor
+/// version is bumped when a change is cherry-picked to stg/pal_prm.
+///
+/// @see PlatformProperties::palVersion
+struct Version
+{
+    uint16 major;  ///< Major version number.
+    uint16 minor;  ///< Minor version number.
+};
+
+/// Union defining the DevDriver GPU ID layout.
+/// This is specifically used by DevDriver across multiple tools/driver and should not be changed.
+union PciId
+{
+    struct
+    {
+        uint32 functionId : 8; ///< PCI function number in the system for this GPU.
+        uint32 deviceId   : 8; ///< PCI device number in the system for this GPU.
+        uint32 busId      : 8; ///< PCI bus number in the system for this GPU.
+        uint32 reserved   : 8; ///< Reserved for future use.
+    };
+    uint32 u32All;             ///< Fields packed as 32-bit uint.
+};
+
+/// Reports capabilities and general properties of this instantiation of the PAL library.
+///
+/// This covers any property that it platform-wide as opposed to being tied to a particular device in the system.
+///
+/// @see IPlatform::GetProperties
+struct PlatformProperties
+{
+    Version palVersion;  ///< Version number of the PAL library.  Note that this is distinct from the interface version.
+                         ///  It will be regularly updated as described in @ref Version.
+    union
+    {
+        struct
+        {
+            uint32 supportNonSwapChainPresents :  1; ///< If set, non-swapchain presents are supported.
+            uint32 supportBlockIfFlipping      :  1; ///< If set, IQueue::Submit can protect against command buffers
+                                                     ///  that write to GPU memory queued for a flip present.
+            uint32 explicitPresentModes        :  1; ///< If set, the PresentMode enums specified during direct and swap
+                                                     ///  chain presents explicitly determine the presentation method.
+                                                     ///  Additionally, the client must enumerate IScreens and use them
+                                                     ///  to explicitly manage fullscreen ownership. Otherwise, the
+                                                     ///  present modes are suggestions and fullscreen ownership is
+                                                     ///  managed internally by PAL.
+            uint32 reserved                    : 29; ///< Reserved for future use.
+        };
+        uint32 u32All;                               ///< Flags packed as 32-bit uint.
+    };
+};
+
+/// The client that Pal may query profile for. the order is the same as SHARED_AP_AREA in KMD escape interface
+enum class ApplicationProfileClient : uint32
+{
+    Uninitialized = 0,
+    Dxx,
+    Udx,
+    Cfx,
+    Ogl,
+    User3D,
+    Ocl,
+    Mmd,
+    Pplib,
+    Dal,
+    Chill,
+    Spp,
+    Boost,
+    DeLag,
+    Ris,
+    TurboSync,
+    DutyCycleScaling,
+    ProBoost,
+    RisWindowed,
+    FreeMux,
+    FsrOvr,
+    Count
+};
+
+/// Describes a primary surface view
+///
+/// @see IPlatform::GetPrimaryLayout()
+struct PrimaryViewInfo
+{
+    Rect    rect;                       ///< Rectangle defining one portion of a primary surface layout.
+    uint32  numIndices;                 ///< The size of the gpuIndex array.
+    uint32  gpuIndex[MaxDevices];       ///< The devices in a linked adapter chain that can use this view.
+};
+
+/// Specifies output arguments for IPlatform::GetPrimaryLayout(), returning information about the layout of the primary
+/// surface.
+///
+/// @see IPlatform::GetPrimaryLayout()
+struct GetPrimaryLayoutOutput
+{
+    uint32                numViews;         ///< The number of views in the pViewInfoList array.
+    PrimaryViewInfo*      pViewInfoList;    ///< The primary surface is composed of these views.
+    union
+    {
+        struct
+        {
+            uint32 disablePartialCopy : 1;  ///< If this flag is not set, the client can transfer the specific views of
+                                            ///  primary surface to peer GPUs. Otherwise, the client must transfer the
+                                            ///  whole primary surface to peer GPUs.
+            uint32 reserved           : 31; ///< Reserved for future use.
+        };
+        uint32 u32All;  ///< Flags packed as 32-bit uint.
+    } flags;            ///< specifies primary surface layout flags.
+};
+
+/// Specifies TurboSync control mode
+enum class TurboSyncControlMode : uint32
+{
+    Disable           = 0,      ///< Disable TurboSync
+    Enable            = 1,      ///< Enable TurboSync
+    UpdateAllocations = 2,      ///< Update allocations only, without disable or enable TurboSync
+    Register          = 3,      ///< Register the current platform as TurboSync requested platform, doesn't actually
+                                ///  activate TurboSync.
+    Count
+};
+
+constexpr uint32 TurboSyncMaxSurfaces = 2; ///< Specifies maximum number of surfaces in a private TurboSync swapchain
+
+/// Input argument for IPlatform::TurboSyncControl. TurboSync is a feature that enables app to render at higher than
+/// V-Sync frame rates while still being tearing-free. It creates a private swapchain and copy application's back
+/// buffer to the primary in this private swapchain when application is flipping. KMD controls the flipping of the
+/// private swapchain to screen.
+struct TurboSyncControlInput
+{
+    TurboSyncControlMode mode;          ///< Specifies the TurboSync control mode
+    uint32               vidPnSourceId; ///< The vidPnSourceId the call is targeted
+
+    /// GpuMemory of the primaries in private swapchain, per-gpu. This is indexed by the device indices enumerated by
+    /// the platform. Pal forwards the allocation handles (if IGpuMemory ptr is not null) to Kmd without validation.
+    const IGpuMemory*    pPrimaryMemoryArray[MaxDevices][TurboSyncMaxSurfaces];
+};
+
+/**
+************************************************************************************************************************
+* @interface IPlatform
+* @brief     Interface representing an client-configurable context of the PAL platform.
+*
+* This is the root of all client interaction with PAL. Each IPlatform contains a set of the IDevice's and IScreens
+* found in the system.
+*
+* + Creation of IDevice and IScreen objects.
+* + Installation of memory management callbacks.
+* + Query application profiles from the system.
+************************************************************************************************************************
+*/
+class IPlatform : public IDestroyable
+{
+public:
+    /// Enumerates a list of available Devices.
+    ///
+    /// This function creates a set of @ref IDevice objects corresponding to the devices attached to the system.
+    /// CreatePlatform() must be called before this function is called.
+    ///
+    /// This function may be called multiple times during the lifetime of the PAL lib, in which case all previous
+    /// @ref IDevice and @ref IScreen objects are automatically destroyed.  The client is responsible for
+    /// destroying all objects attached to the existing @ref IDevice objects before re-calling this function.
+    /// Re-enumerating Devices is required if ErrorDeviceLost is ever returned by PAL, as this may indicate a device
+    /// has been physically removed from the system.
+    ///
+    /// @note Before IPlatform::Destroy can be called, all devices returned by IPlatform::EnumerateDevices() must be
+    ///       destroyed.
+    ///
+    /// @param [out] pDeviceCount Specifies the number of devices available in the system.  This is the number of valid
+    ///                           entries in pDevices[].  Must not be null.
+    /// @param [out] pDevices     Array to be populated with a device object pointer for each device available in the
+    ///                           system. The first *pDeviceCount entries are valid.  Must not be null.
+    ///
+    /// @returns Success if all Devices were successfully enumerated in pDevices[].  Otherwise, one of the following
+    ///          error codes may be returned:
+    ///          + ErrorInitializationFailed will be returned if PAL is unable to query the available Devices.
+    virtual Result EnumerateDevices(
+        uint32*    pDeviceCount,
+        IDevice*   pDevices[MaxDevices]) = 0;
+
+    /// Returns the storage size of the object implementing IScreen.
+    ///
+    /// Use this to determine the size of each pStorage pointer passed to GetScreens.
+    ///
+    /// @returns the storage size in bytes of the object implementing IScreen.
+    virtual size_t GetScreenObjectSize() const = 0;
+
+    /// Retrieves the list of available screens.
+    ///
+    /// This function queries a set of @ref IScreen objects corresponding to the screens attached to the system.
+    /// CreatePlatform() and IPlatform::EnumerateDevices() must be called before this function is called.
+    ///
+    /// This function may be called multiple times during the lifetime of the PAL lib. Each call returns a new
+    /// set of screen objects.
+    ///
+    /// @ingroup LibInit
+    ///
+    /// @param [out] pScreenCount Specifies the number of screens available in the system.  This is the number of valid
+    ///                           entries in pScreens[] and pStorage[].  Must not be null.
+    /// @param [in]  pStorage     Array of caller-allocated storage for the screen objects. Each must be the size
+    ///                           returned by GetScreenObjectSize. Must always pre-allocate MaxScreens worth, must
+    ///                           not be NULL nor may any entry be NULL.
+    /// @param [out] pScreens     Array to be populated with a screen pointer for each screen available in the system.
+    ///                           The first *pScreenCount entries are valid.  Must not be null.
+    ///
+    /// @note pScreens[i] uses the storage from pStorage[i]. pStorage[i] is unused for i >= *pScreenCount.
+    ///
+    /// @returns Success if all screens were successfully retrieved in pScreens[].  Otherwise, one of the following
+    ///          error codes may be returned:
+    ///          + ErrorUnavailable if this was called prior to IPlatform::EnumerateDevices().
+    virtual Result GetScreens(
+        uint32*  pScreenCount,
+        void*    pStorage[MaxScreens],
+        IScreen* pScreens[MaxScreens]) = 0;
+
+    /// Queries a client specified application profile in raw format.
+    ///
+    /// This function queries the kernel-mode driver to determine if there is a platform-wide profile for a specific
+    /// application that the client would like to honor. It is optional, and doesn't need to be called if the client
+    /// does not wish to support application profiles.
+    ///
+    /// As the format of profile is client specified, the profile will be returned in raw format and client has the
+    /// responsibility to parse the profile. @see GpuUtil::ProfileIterator provides a basic capability to iterate all
+    /// properties in the raw data packet. The memory storing the raw data is managed by Pal.
+    ///
+    /// The pFilename string can be the EXE name, like "doom.exe", or the "Content Distribution Network" (CDN) ID,
+    /// like "SteamAppId:570".  You can use the function GpuUtil::QueryAppContentDistributionId() to get the CDN ID.
+    ///
+    /// @ingroup LibInit
+    ///
+    /// @param [in]  pFilename Filename of the application or the Steam/EA/UPlay game ID to query for its profile.
+    ///                        See GpuUtil::QueryAppContentDistributionId().
+    /// @param [in]  pPathname Optional. Allows the caller to specify a pathname in addition to a filename if they wish.
+    /// @param [in]  client    Client name that KMD will query the profile for
+    /// @param [out] pOut      Will be filled with the application profile string if the profile exists and was
+    ///                        successfully queried.
+    ///
+    /// @returns Success if the application profile exists for the specified string(s) and the profile was successfully
+    ///          retrieved, or Unsupported if the profile does not exist and the query was successfully performed.
+    ///          Otherwise, one of the following error codes may be returned:
+    ///          + ErrorInvalidPointer will be returned if pFilename or pOut is null.
+    ///          + ErrorUnavailable if this is called before IPlatform::EnumerateDevices(), or if there were no Devices
+    ///            discovered.
+    virtual Result QueryRawApplicationProfile(
+        const wchar_t*           pFilename,
+        const wchar_t*           pPathname,
+        ApplicationProfileClient client,
+        const char**             pOut) = 0;
+
+    /// Enable UMD side support for the SPP feature (Shader Profiling for Power).  The gist of the initial version
+    /// of this feature is that we will profile important applications to determine which shaders are heavily memory
+    /// bound, then use these profiles to program the RLC to dynamically reduce engine clocks when running such shaders.
+    /// This should result in power savings with a limited perf impact.
+
+    /// Each of these app profiles will include one or more tables specifying how to program the RLC.
+    /// There may be multiple tables in cases where we need different RLC programming based on user controlled factors
+    /// that affect memory boundedness: resolution, MSAA rate, etc.
+
+    /// @ingroup LibInit
+    ///
+    /// @param [in]  pFilename Filename of the application or the Steam/EA/UPlay game ID to query for its profile.
+    ///                        See GpuUtil::QueryAppContentDistributionId().
+    /// @param [in]  pPathname Optional. Allows the caller to specify a pathname in addition to a filename if they wish.
+
+    /// @returns Success if the application profile exists for the specified string(s) and the profile was successfully
+    ///          retrieved, or Unsupported if the profile does not exist and the query was successfully performed.
+    ///          Otherwise, one of the following error codes may be returned:
+    ///          + ErrorInvalidPointer will be returned if pFilename is null.
+    ///          + ErrorUnavailable if this is called before IPlatform::EnumerateDevices(), or if there were no Devices
+    ///            discovered.
+    virtual Result EnableSppProfile(
+        const wchar_t* pFilename,
+        const wchar_t* pPathname) = 0;
+
+    /// Reports the properties of the platform.
+    ///
+    /// Returns the capabilities and general properties of this platform instantiation.
+    ///
+    /// @param [out] pProperties Capabilities and general properties of this platform instantiation (not tied to a
+    ///                          particular device).
+    ///
+    /// @returns Success if the properties were successfully queried and returned in pProperties.  Otherwise, one of the
+    ///          following errors may be returned:
+    ///          + ErrorInvalidPointer if pProperties is null.
+    virtual Result GetProperties(
+        PlatformProperties* pProperties) = 0;
+
+    /// Installs the callback into the specified platform.
+    ///
+    /// @param [in] pPlatform        The platform to install the callback into.
+    /// @param [in] pfnDeveloperCb   The developer callback function pointer to be executed by the pPlatform.
+    /// @param [in] pPrivateData     Private data that is installed with the callback for use by the installer.
+    static void InstallDeveloperCb(
+        IPlatform*          pPlatform,
+        Developer::Callback pfnDeveloperCb,
+        void*               pPrivateData)
+        { pPlatform->InstallDeveloperCb(pfnDeveloperCb, pPrivateData); };
+
+    /// Returns the currently enabled developer callback types.
+    ///
+    /// @returns  The bitmask of currently enabled developer callback types
+    virtual uint32 GetEnabledCallbackTypes() const = 0;
+
+    /// Sets the currently enabled developer callback types.
+    ///
+    /// @param  [in]    callbackTypeMask     A bitmask of client requested developer callback types to enable
+    virtual void SetEnabledCallbackTypes(
+        uint32 enabledCallbackTypesMask) = 0;
+
+    /// Returns a pointer to the developer driver server object if developer mode is enabled on the system.
+    ///
+    /// @returns A valid DevDriver::DevDriverServer pointer if developer mode is enabled. If developer mode is not
+    ///          enabled, nullptr will be returned.
+    virtual DevDriver::DevDriverServer* GetDevDriverServer() = 0;
+
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 890
+    /// Will be replaced by GetSettingsRpcService().
+    virtual SettingsRpcService::SettingsService* GetSettingsService() = 0;
+#endif
+
+    /// Client drivers can register their DevDriver based settings components via SettingsRpcService.
+    ///
+    /// @returns A pointer to a SettingsRpcService object. Could be nullptr if developer driver mode is not enabled.
+    virtual DevDriver::SettingsRpcService* GetSettingsRpcService() = 0;
+
+    /// Returns a pointer to the event server object. The event server will soon move out of the DevDriver
+    /// server. Hence the need to provide a separate interface to access the event server.
+    ///
+    /// @returns A valid EventServer pointer or nullptr if not valid.
+    virtual DevDriver::EventProtocol::EventServer* GetEventServer() = 0;
+
+#if PAL_BUILD_RDF
+    /// Returns a pointer to the current trace session if one was created during startup
+    ///
+    /// @returns A valid TraceSession pointer if a session currently exists. If a trace session was not created during
+    ///          startup, nullptr will be returned.
+    virtual GpuUtil::TraceSession* GetTraceSession() = 0;
+
+    /// Indicate frame count increment to frame trace controller.
+    /// Client driver is responsible for calling this method once per frame.
+    ///
+    /// @param [in] pQueue The queue on which a new frame has been detected
+    virtual void UpdateFrameTraceController(
+        IQueue *pQueue) = 0;
+#endif
+
+    /// Gets the GPU ID for a given pal device index.
+    ///
+    /// The GPU ID is determined from (BusID << 16) | (DeviceID << 8) | FunctionID
+    ///
+    /// @param [in] gpuIndex The index of the GPU
+    ///
+    /// @returns A GPU ID described above or UnknownGPUID if the device doesn't exist
+    virtual PciId GetPciId(uint32 gpuIndex) = 0;
+
+    /// Indicates whether tracing has been enabled.
+    ///
+    /// @returns True if tracing is enabled, false otherwise.
+    virtual bool IsTracingEnabled() const = 0;
+
+    /// Indicates whether the driver has been signaled to enable crash analysis mode.
+    ///
+    /// @returns True if crash analysis is enabled, false otherwise.
+    virtual bool IsCrashAnalysisModeEnabled() const = 0;
+
+    /// Indicates whether the driver has been signaled to enable Raytracing Shader Data Tokens.
+    ///
+    /// @returns True if Raytracing Shader Data Tokens is enabled, false otherwise.
+    virtual bool IsRaytracingShaderDataTokenRequested() const = 0;
+
+    /// Returns a pointer to the Platform settings structure
+    ///
+    /// @returns A reference to a PalPlatformSettings structure.
+    virtual const PalPlatformSettings& PlatformSettings() const = 0;
+
+    /// Get primary surface layout based upon VidPnSource provided by client.
+    ///
+    /// This function is used by client to query the layout of the primary surface. The layout describes how primary
+    /// surface is composed with a set of views. Each view provides the rectangle of the surface area and the GPUs
+    /// this surface area will be displayed on.
+    /// Client should make first call pass in pPrimaryLayoutOutput->pViewInfoList as NULL to query the number of views
+    /// this primary surface has.
+    /// Client then based on pPrimaryLayoutOutput->numViews, allocates the buffer for pViewInfoList. And client then
+    /// makes the escape call again to query the actual view information.
+    ///
+    /// @param [in]      vidPnSourceId          VidPnSource ID that's associated to a primary surface.
+    /// @param [in, out] pPrimaryLayoutOutput   Primary surface layout output arguments.
+    ///
+    /// @returns Success if the display layout on given vidPnSourceId was successfully queried.
+    ///          Otherwise, one of the following errors may be returned:
+    ///          + ErrorInvalidValue if pPrimaryLayoutOutput is invalid.
+    ///          + ErrorUnavailable if no implementation on current platform.
+    ///          + ErrorOutOfMemory if there is not enough system memory.
+    virtual Result GetPrimaryLayout(
+        uint32                  vidPnSourceId,
+        GetPrimaryLayoutOutput* pPrimaryLayoutOutput) = 0;
+
+    /// Calls TurboSyncControl escape to control TurboSync on specific vidPnSourceId.
+    ///
+    /// The function is called when clients intend to toggle TurboSync on a vidPnSourceId. The client should allocate
+    /// private swapchain primary surfaces that's compatible with the application swapchain primaries. When used to
+    /// activate TurboSync, the private primaries' handles needs to be passed in the TurboSyncControlInput data.
+    ///
+    /// @param [in] turboSyncControlInput  TurboSyncControl input arguments. See TurboSyncControlInput.
+    ///
+    /// @returns Success if the TurboSyncControl request is handled successfully.
+    virtual Result TurboSyncControl(
+        const TurboSyncControlInput& turboSyncControlInput) = 0;
+
+    /// Returns the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @returns Pointer to client data.
+    void* GetClientData() const
+    {
+        return m_pClientData;
+    }
+
+    /// Sets the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @param  [in]    pClientData     A pointer to arbitrary client data.
+    void SetClientData(
+        void* pClientData)
+    {
+        m_pClientData = pClientData;
+    }
+
+    /// Allocates memory using the platform's ForwardAllocator.
+    ///
+    /// @param [in] allocInfo @see Util::AllocInfo
+    ///
+    /// @returns Pointer to the allocated memory on success, nullptr on failure.
+    void* Alloc(const Util::AllocInfo& allocInfo)
+    {
+#if PAL_MEMTRACK
+        return m_memTracker.Alloc(allocInfo);
+#else
+        return m_allocator.Alloc(allocInfo);
+#endif
+    }
+
+    /// Frees memory using the platform's ForwardAllocator.
+    ///
+    /// @param [in] freeInfo @see Util::FreeInfo
+    void  Free(const Util::FreeInfo& freeInfo)
+    {
+#if PAL_MEMTRACK
+        m_memTracker.Free(freeInfo);
+#else
+        m_allocator.Free(freeInfo);
+#endif
+    }
+
+    /// Logs a text string via the developer driver bus if it is currently connected.
+    ///
+    /// @param [in] level        Log priority level associated with the message.
+    /// @param [in] categoryMask Log category mask that represents what category fields the message relates to.
+    /// @param [in] pFormat      Format string for the log message.
+    /// @param [in] args         Variable arguments that correspond to the format string.
+    virtual void LogMessage(LogLevel        level,
+                            LogCategoryMask categoryMask,
+                            const char*     pFormat,
+                            va_list         args) = 0;
+
+    /// Logs a text string via the developer driver bus if it is currently connected.
+    ///
+    /// @param [in] level        Log priority level associated with the message.
+    /// @param [in] categoryMask Log category mask that represents what category fields the message relates to.
+    /// @param [in] pFormat      Format string for the log message.
+    /// @param [in] ...          Variable arguments that correspond to the format string.
+    void LogMessage(LogLevel        level,
+                    LogCategoryMask categoryMask,
+                    const char*     pFormat,
+                                    ...)
+    {
+        va_list args;
+        va_start(args, pFormat);
+        LogMessage(level, categoryMask, pFormat, args);
+        va_end(args);
+    }
+
+    /// Logs an event using the DevDriver protocol.
+    ///
+    /// @param [in] eventId       The type of event you want to log.
+    /// @param [in] pEventData    A pointer to the struct corresponding to the event id.
+    /// @param [in] eventDataSize The size of the event data struct.
+    virtual void LogEvent(
+        PalEvent    eventId,
+        const void* pEventData,
+        uint32      eventDataSize) {}
+
+#if PAL_ENABLE_LOGGING
+    /// Function to access the current settings of file logger.
+    /// Clients can call this function to get file logger settings in order to configure
+    /// this logger at the time of its creation.
+    ///
+    /// @param [in] pSettings  A struct in which file logger settings are copied.
+    virtual void GetDbgLoggerFileSettings(
+        Util::DbgLoggerFileSettings* pSettings) = 0;
+#endif
+
+protected:
+    /// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
+    /// called the proper create method.
+    IPlatform(
+        const Util::AllocCallbacks& allocCb)
+        :
+#if PAL_MEMTRACK
+        m_memTracker(&m_allocator),
+#endif
+        m_allocator(allocCb),
+        m_pClientData(nullptr) { }
+
+    /// @internal Destructor. Prevent use of delete operator on this interface.  Client must destroy objects by
+    /// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
+    /// object on their own.
+    virtual ~IPlatform() { }
+
+    /// @internal Initialization common to all platforms; must be called in subclass overrides of this function.
+    /// Currently only handles initialization of the memory leak tracker.
+    virtual Result Init()
+    {
+#if PAL_MEMTRACK
+        return m_memTracker.Init();
+#else
+        return Result::Success;
+#endif
+    }
+
+    /// Used by the InstallDeveloperCb to install the event handler according to the derived platform.
+    ///
+    /// @param [in] pfnDeveloperCb   The developer callback function pointer to be executed by the pPlatform.
+    /// @param [in] pPrivateData     Private data that is installed with the event handler for use by the installer.
+    virtual void InstallDeveloperCb(
+        Developer::Callback pfnDeveloperCb,
+        void*               pPrivateData) = 0;
+
+#if PAL_MEMTRACK
+    /// @internal Memory leak tracker. Requires an allocator in order to perform the actual allocations. We can't
+    /// provide this platform because that would result in a stack overflow. We must give it our forward allocator.
+    Util::MemTracker<Util::ForwardAllocator> m_memTracker;
+#endif
+
+    /// @internal Memory allocator. Calls to Alloc() and Free() are chained down to the allocator's counterparts.
+    Util::ForwardAllocator m_allocator;
+
+private:
+    /// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
+    /// and set via SetClientData().
+    /// For non-top-layer objects, this will point to the layer above the current object.
+    void* m_pClientData;
+};
+
+} // Pal
@@ -0,0 +1,234 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palQueryPool.h
+ * @brief Defines the Platform Abstraction Library (PAL) IQueryPool interface and related types.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palGpuMemoryBindable.h"
+
+namespace Pal
+{
+
+/// Specifies a category of GPU query pool.
+enum class QueryPoolType : uint32
+{
+    Occlusion        = 0x0,  ///< Occlusion query pool. Supports queries based on the Z test.
+    PipelineStats    = 0x1,  ///< Pipeline stats query pool. Supports queries based on statistics from the GPU's execution
+                             ///  such as a count of prims generated, shader invocations, etc.
+    StreamoutStats   = 0x2,  ///< Streamout query pool. Supports queries based on statistics from the GPU's execution
+                             ///  such as number of primitives written to SO buffer and storage needed.
+    Count,
+};
+
+/// Specifies what data a query slot must produce. Some query pool types support multiple query types.
+enum class QueryType : uint32
+{
+    Occlusion        = 0x0, ///< The total passes recorded by the Z test.
+    BinaryOcclusion  = 0x1, ///< One if there were one or more Z test passes, zero otherwise.
+    PipelineStats    = 0x2, ///< The total statistics selected by the given pipeline stats query pool.
+    StreamoutStats   = 0x3, ///< SO statistics tracked by CP/VGT including primitives written and storage needed.
+    StreamoutStats1  = 0x4, ///< SO1 statistics tracked by CP/VGT including primitives written and storage needed.
+    StreamoutStats2  = 0x5, ///< SO2 statistics tracked by CP/VGT including primitives written and storage needed.
+    StreamoutStats3  = 0x6, ///< SO3 statistics tracked by CP/VGT including primitives written and storage needed.
+    Count,
+};
+
+/// Specifies which pipeline stats should be tracked by a pipeline stats query pool.
+enum QueryPipelineStatsFlags : uint32
+{
+    QueryPipelineStatsIaVertices    = 0x1,    ///< Input vertices.
+    QueryPipelineStatsIaPrimitives  = 0x2,    ///< Input primitives.
+    QueryPipelineStatsVsInvocations = 0x4,    ///< Vertex shader invocations.
+    QueryPipelineStatsGsInvocations = 0x8,    ///< Geometry shader invocations.
+    QueryPipelineStatsGsPrimitives  = 0x10,   ///< Geometry shader primitives.
+    QueryPipelineStatsCInvocations  = 0x20,   ///< Clipper invocations.
+    QueryPipelineStatsCPrimitives   = 0x40,   ///< Clipper primitives.
+    QueryPipelineStatsPsInvocations = 0x80,   ///< Pixel shader invocations.
+    QueryPipelineStatsHsInvocations = 0x100,  ///< Hull shader invocations.
+    QueryPipelineStatsDsInvocations = 0x200,  ///< Domain shader invocations.
+    QueryPipelineStatsCsInvocations = 0x400,  ///< Compute shader invocations.
+    QueryPipelineStatsTsInvocations = 0x800,  ///< Task shader invocations.
+    QueryPipelineStatsMsInvocations = 0x1000, ///< Mesh shader invocations.
+    QueryPipelineStatsMsPrimitives  = 0x2000, ///< Mesh shader primitives.
+    QueryPipelineStatsAll           = 0x3FFF  ///< All of the above stats.
+};
+
+/// Specifies properties for @ref IQueryPool creation.  Input structure to IDevice::CreateQueryPool().
+struct QueryPoolCreateInfo
+{
+    QueryPoolType queryPoolType;    ///< Type of query pool to create (i.e., occlusion vs. pipeline stats).
+    uint32        numSlots;         ///< Number of slots in the query pool.
+    uint32        enabledStats;     ///< An ORed mask of stats flags specific to the query pool type.
+                                    ///  @see QueryPipelineStatsFlags for PipelineStats query pools.
+    union
+    {
+        struct
+        {
+            /// If true, this query pool can have results retrieved using the CPU (using @ref IQueryPool::GetResults)
+            /// and can be reset using the CPU (using @ref IQueryPool::Reset).  Otherwise, the client must use command
+            /// buffers to perform these operations (using @ref ICmdBuffer::CmdResetQueryPool and
+            /// @ref ICmdBuffer::CmdResolveQuery).
+            uint32  enableCpuAccess :  1;
+            uint32  reserved        : 31;   ///< Reserved for future use.
+        };
+        uint32  u32All; ///< Flags packed together as a uint32.
+    } flags;            ///< Flags controlling QueryPool behavior.
+};
+
+/// Controls operations that compute query results.
+enum QueryResultFlags : uint32
+{
+    QueryResultDefault          =  0x0, ///< Default to 32-bit results with no waiting.
+    QueryResult64Bit            =  0x1, ///< Store all results as 64-bit values.
+    QueryResultWait             =  0x2, ///< Wait for the queries to finish when computing the results.
+    QueryResultAvailability     =  0x4, ///< If the results of a query are available at computation time a one will be
+                                        ///  written as a separate value after the result value, if the results were not
+                                        ///  available a zero will be written.
+    QueryResultPartial          =  0x8, ///< If the final result of a query would be unavailable, then return a
+                                        ///  result for that query between 0 and what the final result would be.
+    QueryResultAccumulate       = 0x10, ///< Results are added to the values present in the destination, if availability
+                                        ///  data is enabled it will be ANDed with the present availability data.
+    QueryResultPreferShaderPath = 0x20, ///< Prefer a shader resolve path over a command processor path.
+    QueryResultOnlyPrimNeeded   = 0x40, ///< Select only primitives storage needed in Streamout query results
+    QueryResultAll              = 0x7F  ///< Clients should NOT use it, for internal static_assert purpose only.
+};
+
+/**
+ ***********************************************************************************************************************
+ * @interface IQueryPool
+ * @brief     Represents a set of queries that can be used to retrieve detailed info about the GPU's execution of a
+ *            particular range of a command buffer.
+ *
+ * Currently, only occlusion queries and pipeline statistic queries are supported.  All queries in a pool are the same
+ * type.
+ *
+ * @see IDevice::CreateQueryPool()
+ ***********************************************************************************************************************
+ */
+class IQueryPool : public IGpuMemoryBindable
+{
+public:
+    /// Retrieves query results from a query pool.
+    ///
+    /// Multiple consecutive query results can be retrieved with one call.
+    ///
+    /// @param [in]     flags      Flags that control the result data layout and how the results are retrieved.
+    /// @param [in]     queryType  Specifies what data the query slots must produce.
+    /// @param [in]     startQuery First query pool slot to retrieve data for.
+    /// @param [in]     queryCount Number of query pool slots to retrieve data for.
+    /// @param [in]     pMappedGpuAddr Specify the query buffer mapped address. If the parameter equals nullptr,
+    //                                 this method will use Map\UnMap to access the data.
+    /// @param [in,out] pDataSize  Input value specifies the available size in pData in bytes; output value reports the
+    ///                            number of bytes required to hold all result data.
+    /// @param [out]    pData      Location where the query results should be written. Can be null in order to query the
+    ///                            required size. The data returned depends on the query pool type and flags. All data
+    ///                            entries are either uint32 or uint64 integers. One or more type-specific entries will
+    ///                            be optionally followed by one entry for availability. The type-specific data is:<br>
+    ///                            + QueryOcclusion: One entry to store the zPass count.
+    ///                            + QueryPipelineStats: One entry per statistic enabled in the create info. The stats
+    ///                              will be written in the appropriate order for each PAL client.
+    /// @param [in]     stride     Stride in bytes between subsequent query result data or zero to request tightly
+    ///                            packed result data.
+    ///
+    /// @returns Success if query results were successfully returned in pData, or NotReady if any of the requested query
+    ///          slots does not yet have results available.  Otherwise, one of the following error codes may be
+    ///          returned:
+    ///          + ErrorInvalidValue if the range defined by startQuery and queryCount is not valid for this query pool.
+    ///          + ErrorGpuMemoryNotBound if the query pool requires GPU memory but none is bound.
+    ///          + ErrorInvalidMemorySize if pData is non-null and the value stored in pDataSize is too small.
+    virtual Result GetResults(
+        QueryResultFlags flags,
+        QueryType        queryType,
+        uint32           startQuery,
+        uint32           queryCount,
+        const void*      pMappedGpuAddr,
+        size_t*          pDataSize,
+        void*            pData,
+        size_t           stride) = 0;
+
+    /// Use CPU to reset the query pool slots.
+    ///
+    /// Supported for occlusion and video decode statistics query pools.
+    ///
+    /// @param [in]     startQuery     First query pool slot to reset.
+    /// @param [in]     queryCount     Number of query pool slots to reset.
+    /// @param [in]     pMappedCpuAddr Specify the query buffer mapped address. If the parameter equals nullptr,
+    //                                 this method will use Map/UnMap to access the data.
+    ///
+    /// @returns Success if the reset was successfully performed.
+    virtual Result Reset(
+        uint32  startQuery,
+        uint32  queryCount,
+        void*   pMappedCpuAddr) = 0;
+
+    /// Returns the distance, in bytes, between successive query slots in the bound GPU memory.
+    /// This method is only supported for @ref QueryPoolType::VideoDecodeStats
+    ///
+    /// @returns the distance, in bytes, between successive query slots in the bound GPU memory.
+    virtual gpusize GetQuerySlotStride() const = 0;
+
+    /// Returns the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @returns Pointer to client data.
+    void* GetClientData() const
+    {
+        return m_pClientData;
+    }
+
+    /// Sets the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @param  [in]    pClientData     A pointer to arbitrary client data.
+    void SetClientData(
+        void* pClientData)
+    {
+        m_pClientData = pClientData;
+    }
+
+protected:
+    /// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
+    /// called the proper create method.
+    IQueryPool() : m_pClientData(nullptr) {}
+
+    /// @internal Destructor.  Prevent use of delete operator on this interface.  Client must destroy objects by
+    /// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
+    /// object on their own.
+    virtual ~IQueryPool() { }
+
+private:
+    /// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
+    /// and set via SetClientData().
+    /// For non-top-layer objects, this will point to the layer above the current object.
+    void* m_pClientData;
+};
+
+} // Pal
@@ -0,0 +1,765 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palQueue.h
+ * @brief Defines the Platform Abstraction Library (PAL) IQueue interface and related types.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palDestroyable.h"
+#include "palEvent.h"
+
+namespace Pal
+{
+
+// Forward declarations.
+class ICmdBuffer;
+class IFence;
+class IGpuMemory;
+class IImage;
+class IPrivateScreen;
+class IQueueSemaphore;
+class IScreen;
+class ISwapChain;
+struct CmdBufInfo;
+struct GpuMemSubAllocInfo;
+struct GpuMemoryRef;
+struct DoppRef;
+
+enum class VirtualGpuMemAccessMode : uint32;
+
+/// Specifies whether presents are windowed or fullscreen.  This will determine whether the present is performed via a
+/// BLT or flip.
+enum class PresentMode : uint32
+{
+    Unknown,
+    Windowed,
+    Fullscreen,
+    Count
+};
+
+/// Enumerates the possible overrides for the flip interval.
+enum class FlipIntervalOverride : uint32
+{
+    _None                 = 0, ///< No override.
+    Immediate             = 1, ///< Zero frames of flip latency.
+    ImmediateAllowTearing = 2, ///< Same as Immediate, but allows tearing (no vsync).
+    One                   = 3, ///< One frame of flip latency.
+    Two                   = 4, ///< Two frames of flip latency.
+    Three                 = 5, ///< Three frames of flip latency.
+    Four                  = 6, ///< Four frames of flip latency.
+};
+
+/// Defines flags for describing which types of present modes are supported on a given queue.
+enum PresentModeSupport : uint32
+{
+    SupportWindowedPresent          = 0x1,
+    SupportWindowedPriorBlitPresent = 0x2,
+    SupportFullscreenPresent        = 0x4,
+};
+
+/// Defines submit-time bottlenecks which PAL can potentially optimize.
+enum class SubmitOptMode : uint32
+{
+    Default           = 0, ///< PAL will enable optimizations when generally efficient.
+    Disabled          = 1, ///< Disable all optimizations that could be detrimental in special cases.
+    MinKernelSubmits  = 2, ///< Minimize the overhead of launching command buffers on the CPU and GPU.
+    MinGpuCmdOverhead = 3, ///< Minimize the overhead of reading command buffer commands on the GPU.
+    Count
+};
+
+/// Enumerates vcn instance affinity statuses
+enum MmAffinityStatus : uint32
+{
+    MmAffinityNotAllowed = 0, ///< The specific vcn instance can't be used.
+    MmAffinityAllowed    = 1  ///< The specific vcn instance can be used.
+};
+
+/// Union describes all vcn instance affinity status.
+union MmAffinity
+{
+    struct
+    {
+        uint32 vcn0Affinity : 2;  ///< Affinity for instance vcn0
+        uint32 vcn1Affinity : 2;  ///< Affinity for instance vcn1
+        uint32 reserved     : 28; ///< Reserved (all 0)
+    };
+    uint32 u32All;
+};
+
+/// Structure describing dump information for a command buffer.
+struct CmdBufferDumpDesc
+{
+    EngineType    engineType;       ///< The engine type that this buffer is targeted for.
+    QueueType     queueType;        ///< The type of queue that this buffer is being created on.
+    SubEngineType subEngineType;    ///< The ID of which sub-engine that this buffer is made for.
+
+    uint32        cmdBufferIdx;     ///< The index into the SubmitInfo ppCmdBuffers array that this
+                                    ///  command buffer dump came from.
+    union
+    {
+        struct
+        {
+            uint8 isPreamble  : 1;  ///< Set if the buffer is an internal preamble command buffer.
+            uint8 isPostamble : 1;  ///< Set if the buffer is an internal postamble command buffer.
+            uint8 reserved    : 6;  ///< Reserved for future use.
+        };
+        uint8 u32All;               ///< Flags packed as 8-bit uint.
+    } flags;
+
+};
+
+/// Structure describing a command buffer chunk for use while dumping command buffers.
+struct CmdBufferChunkDumpDesc
+{
+    uint32       id;        ///< ID (number) of this command chunk within the command buffer.
+    const void*  pCommands; ///< Pointer to the command data.
+    size_t       size;      ///< Size of valid data in bytes pointed to in pCommands.
+};
+
+/// Definition for command buffer dumping callback.
+///
+/// @param [in] cmdBufferDesc   Description of the command buffer.
+/// @param [in] pChunks         Pointer to an array of command buffer chunk descriptions.
+/// @param [in] numChunks       The number of chunks pointed to in pChunks.
+typedef void (PAL_STDCALL* CmdDumpCallback)(
+    const CmdBufferDumpDesc&      cmdBufferDesc,
+    const CmdBufferChunkDumpDesc* pChunks,
+    uint32                        numChunks,
+    void*                         pUserData);
+
+/// Specifies properties for @ref IQueue creation.  Input structure to IDevice::CreateQueue().
+struct QueueCreateInfo
+{
+    QueueType     queueType;     ///< Selects which type of queue to create.
+    EngineType    engineType;    ///< Selects which type of engine to create.
+    uint32        engineIndex;   ///< Which instance of the specified engine type to query. For example, there
+                                 ///  can be multiple compute queues, so this parameter distinguished between them.
+    SubmitOptMode submitOptMode; ///< A hint telling PAL which submit-time bottlenecks should be optimized, if any.
+    QueuePriority priority;      ///< A hint telling PAL to create queue with proper priority.
+                                 ///  It is only supported if supportQueuePriority is set in DeviceProperties.
+                                 ///  In Linux, if we don't have root privilege, the creation with above-Medium
+                                 ///  priority will fail. Client should take the corresponding action like retry
+                                 ///  with lower priority, if necessary.
+    struct
+    {
+        uint32 aqlQueue                        :  1; ///< Compute queue will process AQL packets and kernels
+        uint32 windowedPriorBlit               :  1; ///< All windowed presents on this queue are notifications
+                                                     ///  that the client has manually done a blit present
+        uint32 tmzOnly                         :  1; ///< This queue allows only TMZ submissions. Required for
+                                                     ///  compute TMZ submits.
+
+#if PAL_AMDGPU_BUILD
+        uint32 enableGpuMemoryPriorities       :  1; ///< Enables support for GPU memory priorities on this Queue.
+                                                     /// This is optional because enabling the feature requires
+                                                     /// a small amount of memory overhead per-Queue for
+                                                     /// bookkeeping purposes.
+#else
+        uint32 placeholder2                    :  1; ///< Reserved field. Set to 0.
+#endif
+        uint32 dispatchTunneling               :  1; ///< This queue uses compute dispatch tunneling.
+
+        uint32 forceWaitIdleOnRingResize       :  1; ///< This queue need to wait for idle before resize RingSet.
+                                                     ///  This is intended as a workaround for misbehaving applications.
+#if defined(_WIN32)
+        uint32 nullRendering                   :  1; ///< Setting this bit makes this queue behave like IfhModeKmd.
+#else
+        uint32 placeholder3                    :  1; ///< Reserved field. Set to 0.
+#endif
+        uint32 reserved                        : 25; ///< Reserved for future use.
+    };
+
+    uint32 numReservedCu;           ///< The number of reserved compute units for RT CU queue
+
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 914
+    uint32 persistentCeRamOffset;   ///< Byte offset to the beginning of the region of CE RAM which this Queue should
+                                    ///  preserve across consecutive submissions.  Must be a multiple of 32.  It is an
+                                    ///  error to specify a nonzero value here if the the Device does not support
+                                    ///  @ref supportPersistentCeRam for the Engine this Queue will attach to.
+    uint32 persistentCeRamSize;     ///< Amount of CE RAM space which this Queue should preserve across consecutive
+                                    ///  submissions.  Units are in DWORDs, and this must be a multiple of 8.  It is an
+                                    ///  error to specify a nonzero value here if the the Device does not support
+                                    ///  @ref supportPersistentCeRam for the Engine this Queue will attach to.
+#endif
+
+    uintptr_t aqlPacketList;        ///< Location of the HIP runtime's info about this queue
+};
+
+/// Specifies the portion of @ref SubmitInfo that is specific to each sub-queue in a multi-queue object (@see
+/// IDevice::CreateMultiQueue).  Effectively, this enables specifying a different set of command buffers for each
+/// queue that makes up a gang submission to a multi-queue object.
+struct PerSubQueueSubmitInfo
+{
+    uint32            cmdBufferCount;   ///< Number of command buffers to be submitted (can be 0 if this submit doesn't
+                                        ///  involve work for the relevant queue).
+    ICmdBuffer*const* ppCmdBuffers;     ///< Array of cmdBufferCount command buffers to be submitted.  Command buffers
+                                        ///  that are part of a ganged submit must guarantee the conditions required
+                                        ///  for the optimizeExclusiveSubmit flag.
+    const CmdBufInfo* pCmdBufInfoList;  ///< Null, or an array of cmdBufferCount structs providing additional
+                                        ///  info about the command buffers being submitted.  If non-null,
+                                        ///  elements are ignored if their isValid flag is false.
+};
+
+/// Specifies all information needed to execute a set of command buffers.  Input structure to IQueue::Submit().
+///
+/// Some members of this structure are not supported on all platforms.  The client must check the appropriate properties
+/// structures to determine if the corresponding features are supported:
+/// + pGpuMemoryRefs:    Support is indicated by supportPerSubmitMemRefs in @ref DeviceProperties.
+/// + ppBlockIfFlipping: Support is indicated by supportBlockIfFlipping in @ref PlatformProperties.  If it is supported,
+///                      the client must not specify a blockIfFlippingCount greater than MaxBlockIfFlippingCount.
+///
+/// @note If this queue is running in physical submission mode (due to hardware restrictions), the gpuMemRefCount and
+///       pGpuMemoryRefs arguments to this method are ignored because the command buffers themselves contain their own
+///       GPU memory reference lists.
+struct MultiSubmitInfo
+{
+    const PerSubQueueSubmitInfo* pPerSubQueueInfo;///< Specifies per-subqueue information for the submit.  Typically
+                                                  ///  this is a pointer to a single entry specifying the command
+                                                  ///  buffers to be submitted on this queue.  For gang submission on
+                                                  ///  a multi-queue, this should be an array with one entry per
+                                                  ///  sub-queue.  The array size must be less than or equal to the
+                                                  ///  queueCount specified when the multi-queue was created and
+                                                  ///  the workload specified in each entry will be assigned to the
+                                                  ///  corresponding sub-queue.  It is valid to have a cmdBufferCount
+                                                  ///  of 0 for sub-queues without work. Can be null if perSubQueueInfo-
+                                                  ///  Count is 0.
+    uint32                  perSubQueueInfoCount; ///< Number of PerSubqueueSubmitInfo to be submitted. Can be zero if
+                                                  ///  there is no work to submit.
+    uint32                  gpuMemRefCount;       ///< Number of GPU memory references for this submit.
+    const GpuMemoryRef*     pGpuMemoryRefs;       ///< Array of gpuMemRefCount GPU memory references.  Can be null if
+                                                  ///  gpuMemRefCount is zero.  The GPU memory objects will be made
+                                                  ///  resident for the duration of this submit.
+    uint32                  doppRefCount;         ///< Number of DOPP desktop texture references for this submit.
+    const DoppRef*          pDoppRefs;            ///< Array of doppRefCount DOPP texture references.  Can be null if
+                                                  ///  doppRefCount is zero.
+    uint32                  externPhysMemCount;   ///< Number of entries in ppExternPhysMem.
+    const IGpuMemory**      ppExternPhysMem;      ///< Array of external physical memory allocations to be initialized
+                                                  ///  as part of this submit.  The first submit that references a
+                                                  ///  particular external physical memory allocation must include
+                                                  ///  that allocation in this list.  Subsequent submits that reference
+                                                  ///  the same allocation should not include it in this list, as it
+                                                  ///  would trigger redundant GPU page table initialization.
+    uint32                  blockIfFlippingCount; ///< Number of GPU memory objects to protect when flipped.
+    const IGpuMemory*const* ppBlockIfFlipping;    ///< Array of blockIfFlippingCount GPU memory objects.  Can be null if
+                                                  ///  blockIfFlippingCount is zero.  The command buffers will not be
+                                                  ///  scheduled to the GPU while a fullscreen (flip) present is queued
+                                                  ///  for any of these GPU memory allocations.
+    uint32                  fenceCount;           ///< Number of fence objects to be signaled once the last command buffer
+                                                  ///  in this submission completes execution.
+    IFence**                ppFences;             ///< Array of fence objects. Can be null if fenceCount is zero.
+    CmdDumpCallback         pfnCmdDumpCb;         ///< Null, or a callback function to handle the dumping of the
+                                                  ///  command buffers used in this submit.
+    void*                   pUserData;            ///< Client provided data to be passed to callback.
+
+    uint32                  stackSizeInDwords;    ///< 0, or the max of stack frame size for indirect shaders of the
+                                                  ///  pipelines referenced in the command buffers of this submission.
+                                                  ///  The size is per native thread. So that the client will have to
+                                                  ///  multiply by 2 if a Wave64 shader that needs scratch is used.
+                                                  ///  Note that the size will not shrink for the lifetime of the queue
+                                                  ///  once it is grown and only affects compute scratch ring.
+    const IGpuMemory*       pFreeMuxMemory;       ///< The gpu memory object of the private flip primary surface for the
+                                                  ///  FreeMux feature.
+};
+
+typedef MultiSubmitInfo SubmitInfo;
+
+/// The value of blockIfFlippingCount in @ref SubmitInfo cannot be greater than this value.
+constexpr uint32 MaxBlockIfFlippingCount = 16;
+
+/// Specifies properties for the presentation of an image to the screen.  Input structure to IQueue::PresentDirect().
+struct PresentDirectInfo
+{
+    union
+    {
+        struct
+        {
+            uint32 fullscreenDoNotWait :  1; ///< Fail the present immediately if the present queue is full.
+            uint32 srcIsTypedBuffer    :  1; ///< True if the source is a typed buffer instead of an image.
+            uint32 dstIsTypedBuffer    :  1; ///< True if the destination is a typed buffer instead of an image.
+            uint32 notifyOnly          :  1; ///< Indicates that a present occurred outside of PAL. PAL must not
+                                             ///  execute a present if this is true but may update internal
+                                             ///  tracking state.
+            uint32 reserved            : 28; ///< Reserved for future use.
+        };
+        uint32 u32All;       ///< Flags packed as 32-bit uint.
+    } flags;                 ///< Present flags.
+
+    OsWindowHandle hWindow;         ///< Native OS window handle that this image should be presented to.
+    PresentMode    presentMode;     ///< Chooses between windowed and fullscreen present.
+    uint32         presentInterval; ///< Must be an integer from 0 to 4.  0 indicates that the present should
+                                    ///  occur immediately (may tear), and 1-4 indicates the present should
+                                    ///  occur after 1 to 4 vertical syncs.  Only valid for fullscreen presents.
+    union
+    {
+        IImage*        pSrcImage;       ///< Optional: The image to be presented.  If null, the present will not
+                                        ///  occur but PAL may still call into the OS on certain platforms that
+                                        ///  expect it.
+        IGpuMemory*    pSrcTypedBuffer; ///< The typed buffer to be presented.  If null, the present will not occur
+                                        ///  but PAL may still call into the OS on certain platforms that expect it.
+    };
+    union
+    {
+        IImage*        pDstImage;       ///< Optional: copy from the source image to this image.  If null, PAL will
+                                        ///  automatically copy into the appropriate platform-specific destination.
+                                        ///  This is only supported for windowed mode presents.
+        IGpuMemory*    pDstTypedBuffer; ///< The typed buffer to be presented.  If null, the present will not occur
+                                        ///  but PAL may still call into the OS on certain platforms that expect it.
+    };
+
+};
+
+/// Media stream counter information.
+struct MscInfo
+{
+    uint64 targetMsc;                  ///< if the current MSC is less than <targetMsc>, the buffer swap
+                                       ///< will occur when the MSC value becomes equal to <targetMsc>
+    uint64 divisor;                    ///< Divisor
+                                       ///< the buffer swap will occur the next time the MSC value is
+                                       ///< incremented to a value such that MSC % <divisor> = <remainder>
+                                       ///< if the current MSC is greater than or equal to <targetMsc>
+    uint64 remainder;                  ///< Remainder
+};
+
+/// Specifies properties for the presentation of an image to the screen.  Input structure to IQueue::PresentSwapChain().
+struct PresentSwapChainInfo
+{
+    PresentMode presentMode;    ///< Chooses between windowed and fullscreen present.
+    IImage*     pSrcImage;      ///< The image to be presented.
+    ISwapChain* pSwapChain;     ///< The swap chain associated with the source image.
+    uint32      imageIndex;     ///< The index of the source image within the swap chain. Owership of this image
+                                ///  index will be released back to the swap chain if this call succeeds.
+    uint32      rectangleCount; ///< Number of valid rectangles in the pRectangles array.
+    uint32      syncInterval;   ///< Applicable only when syncIntervalOverride is set
+                                ///  0 - The presentation occurs immediately, there is no synchronization.
+                                ///  1 through 4 - Synchronize presentation after the nth vertical blank.
+    const Rect* pRectangles;    ///< Array of rectangles defining the regions which will be updated.
+    uint64      presentId;      ///< PresentId functions as an identifier for present operations on a swapchain.
+                                ///  If this PresentId is non-zero, then the application can later use this value
+                                ///  to refer to that image presentation. A value of zero indicates that this
+                                ///  presentation has no associated presentId. A non-zero presentId must be greater
+                                ///  than any non-zero presentId passed previously by the application for the same
+                                ///  swapchain.
+    union
+    {
+        struct
+        {
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 941
+            uint32 notifyOnly           :  1; ///< True if it is a notify-only present
+#else
+            uint32 notifyOnly           :  1; ///< Indicates that a present occurred outside of PAL. PAL must not
+                                              ///  execute a present if this is true but may update internal
+                                              ///  tracking state.
+#endif
+            uint32 isTemporaryMono      :  1; ///< True if WS Stereo is enabled, but 3D display mode turned off.
+            uint32 turboSyncEnabled     :  1; ///< Whether TurboSync is enabled.
+            uint32 syncIntervalOverride :  1; ///< Override default syncInterval with the value in syncInterval
+                                              ///  Supported only on Windows wsiPlatforms.
+            uint32 reserved             : 28; ///< Reserved for future use.
+        };
+        uint32 u32All; ///< Flags packed as 32-bit uint.
+    } flags;           ///< PresentSwapChainInfo flags.
+#if PAL_AMDGPU_BUILD
+    MscInfo mscInfo;   ///< Media stream counter information
+#endif
+};
+
+/// Specifies a mapping from a range of pages in a virtual GPU memory object to a range of pages in a real GPU memory
+/// object.  Input to IQueue::RemapVirtualMemoryPages().
+///
+/// When mapping pages of a virtual GPU memory object to a range of pages in a real GPU memory object on a remote GPU,
+/// the client must point pRealGpuMem at a peer GPU memory object created on the input queue's device instead of the
+/// actual real GPU memory object created on the remote device.  This is required for two reasons:
+///   1. PAL can only view remote GPU memory using peer objects.
+///   2. PAL enforces a separation of state between different IDevice object families.
+///
+/// virtualStartOffset and size must be aligned to the virtualMemPageSize member of @ref DeviceProperties.
+/// realStartOffset must be aligned to the realMemAllocGranularity member of @ref DeviceProperties.
+struct VirtualMemoryRemapRange
+{
+    IGpuMemory*             pVirtualGpuMem;     ///< Virtual GPU memory object whose mapping is being updated.
+    gpusize                 virtualStartOffset; ///< Start of the page range to be updated, in bytes.
+    IGpuMemory*             pRealGpuMem;        ///< Real GPU memory object the virtual range should point at.
+    gpusize                 realStartOffset;    ///< Start of the page range in the real GPU memory object, in bytes.
+    gpusize                 size;               ///< Size of the mapping range, in bytes.
+    VirtualGpuMemAccessMode virtualAccessMode;  ///< Access mode for virtual GPU memory's unmapped pages.
+                                                ///  This parameter is ignored on some platforms.
+};
+
+/// Specifies a set of page mappings to copy between virtual GPU memory objects. The source and destination can be the
+/// same memory object and the source and destination regions may overlap. Input to IQueue::CopyVirtualMemoryPageMappings().
+///
+/// srcStartOffset, dstStartOffset, and size must be aligned to the virtualMemPageSize member of @ref DeviceProperties.
+struct VirtualMemoryCopyPageMappingsRange
+{
+    IGpuMemory* pSrcGpuMem;     ///< Virtual GPU memory object whose mapping is being copied from.
+    gpusize     srcStartOffset; ///< Start of the copy source range, in bytes.
+    IGpuMemory* pDstGpuMem;     ///< Virtual GPU memory object whose mapping is being copied to.
+    gpusize     dstStartOffset; ///< Start of the copy destination range, in bytes.
+    gpusize     size;           ///< Size of the mapping range, in bytes.
+};
+
+/// Specifies kernel level information about a context.
+struct KernelContextInfo
+{
+    union
+    {
+        struct
+        {
+            uint32 hasDebugVmid        :  1; ///< True if the context has acquired the debug vmid.
+            uint32 hasHighPriorityVmid :  1; ///< True if the context has acquired the high priority vmid.
+            uint32 reserved            : 30; ///< Reserved for future use.
+        };
+        uint32 u32All;                       ///< Flags packed as 32-bit uint.
+    } flags;                                 ///< Context flags.
+
+    uint64 contextIdentifier;                ///< Kernel scheduler context identifier.
+};
+
+/**
+ ***********************************************************************************************************************
+ * @interface IQueue
+ * @brief     Represents a queue of work for a particular GPU engine on a device.
+ *
+ * An IQueue object is a virtual representation of a hardware engine on the device. Multiple IQueue objects can be
+ * created and have work submitted on them in parallel. Work is submitted to a queue through @ref ICmdBuffer objects,
+ * and work can be synchronized between multiple queues using @ref IQueueSemaphore objects.
+ *
+ * @see IDevice::GetQueue()
+ ***********************************************************************************************************************
+ */
+class IQueue : public IDestroyable
+{
+public:
+    /// Submits a group of root command buffers for execution on this queue.
+    ///
+    /// @param [in] submitInfo Specifies all command buffers to execute along with other residency and synchronization
+    ///                        information.  See @ref SubmitInfo for additional, important documentation.
+    ///
+    /// @returns Success if the command buffer was successfully submitted.  Otherwise, one of the following errors may
+    ///          be returned:
+    ///          + ErrorInvalidPointer if:
+    ///              - any of the array inputs are null when their counts are non-zero.
+    ///              - any members of non-null point arrays are null.
+    ///          + ErrorTooManyMemoryReferences if the total number of memory references (device/queue global and
+    ///            per-command buffer) is too large.
+    ///          + ErrorInvalidValue if blockIfFlippingCount is too large.
+    ///          + ErrorIncompleteCommandBuffer if any of the submitted command buffers are not properly constructed.
+    ///          + ErrorIncompatibleQueue if any submitted command buffer does not match this queue's type (e.g.,
+    ///            universal, graphics, DMA).
+    virtual Result Submit(
+        const MultiSubmitInfo& submitInfo) = 0;
+
+    /// Waits for all previous submission on this queue to complete before control is returned to the caller.
+    ///
+    /// @returns Success if wait for submissions completed.  Otherwise an error indicates reason for unsuccessful wait,
+    ///          for example due to lost device.
+    virtual Result WaitIdle() = 0;
+
+    /// Inserts a semaphore signal into the GPU queue.  The semaphore will be signaled once all previously submitted
+    /// work on this queue has completed.
+    ///
+    /// @param [in] pQueueSemaphore     Semaphore to signal.
+    /// @param [in] value               timeline Semaphore point value to signal, ignored for non-timeline semaphores.
+    ///
+    /// @returns Success if the semaphore signal was successfully queued.  Otherwise, one of the following errors may be
+    ///          returned:
+    ///          + ErrorUnknown if the OS scheduler rejects the signal for unknown reasons.
+    virtual Result SignalQueueSemaphore(
+        IQueueSemaphore* pQueueSemaphore, uint64 value = 0) = 0;
+
+    /// Inserts a semaphore wait into the GPU queue.  The queue will be stalled until the specified semaphore is
+    /// signaled.
+    ///
+    /// @param [in] pQueueSemaphore     Semaphore to wait on.
+    /// @param [in] value               timeline semaphore point value to wait on, ignored for non-timeline semaphores.
+    ///
+    /// @returns Success if the semaphore wait was successfully queued.  Otherwise, one of the following errors may be
+    ///          returned:
+    ///          + ErrorUnknown if the OS scheduler rejects the wait for unknown reasons.
+    virtual Result WaitQueueSemaphore(
+        IQueueSemaphore* pQueueSemaphore, uint64 value = 0) = 0;
+
+#if PAL_KMT_BUILD
+    /// Acquire the keyed mutex of shared GPU memory object (CPU sync) and then wait for the synchronization object of
+    /// the shared GPU memory object (GPU sync based on fence). Note that the shared GPU memory object has to be
+    /// a D3d11 resource created with (D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX | D3D11_RESOURCE_MISC_SHARED_NTHANDLE)
+    /// misc flag.
+    ///
+    /// @param [in] pGpuMemory  Shared GPU memory object on which keyed mutex and synchronization object are bound.
+    ///        [in] key         Key of keyed mutex to be acquired.
+    ///        [in] timeout     Timeout interval for keyed mutex acquiring, in milliseconds.
+    ///
+    /// @ returns Success if the keyed mutex has been successfully acquired and wait for the synchronization object
+    ///           has been successfully scheduled. Otherwise, one of the following errors may be returned:
+    ///           + ErrorUnknown if either the keyed mutex has not been successfully acuiqred or wait for the
+    ///           synchronization object has not been successfully scheduled.
+    virtual Result KeyedMutexAcquireSync(
+        IGpuMemory*               pGpuMemory,
+        uint64                    key,
+        std::chrono::milliseconds timeout) = 0;
+
+    /// Signal the synchronization object of shared GPU memory object with bumped fence value and then release the
+    /// keyed mutex of shared GPU memory object. Note that the shared GPU memory object has to be a D3d11 resource
+    /// created with (D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX | D3D11_RESOURCE_MISC_SHARED_NTHANDLE) misc flag.
+    ///
+    /// @param [in] pGpuMemory  Shared GPU memory object on which keyed mutex and synchronization object are
+    ///                         bound.
+    ///        [in] key         Key of keyed mutex to be released.
+    ///
+    /// @ returns Success if signal of the synchronization object has been successfully scheduled and the keyed mutex
+    ///           has been successfully released. Otherwise, one of the following errors may be returned:
+    ///           + ErrorUnknown if either signal of the synchronization object has not been successfully scheduled
+    ///           or keyed mutex has not been successfully released.
+    virtual Result KeyedMutexReleaseSync(
+        IGpuMemory* pGpuMemory,
+        uint64      key) = 0;
+#endif
+
+    /// This function passes application information to KMD for application specific power optimizations.
+    /// Power configuration are restored to default when all application queues are destroyed.
+    ///
+    /// @param [in]  pFileName  Application executable name
+    /// @param [in]  pPathName  Path to the application
+    ///
+    /// @returns Success if the information is passed successfully.  Otherwise, one of the following errors may be
+    ///          returned:
+    ///          + Unsupported if this function is not available on this OS or if the queue context is null.
+    ///          + ErrorUnknown if an unexpected internal error occurs.
+    virtual Result UpdateAppPowerProfile(
+        const wchar_t* pFileName,
+        const wchar_t* pPathName) = 0;
+
+    /// Queues the specified image for presentation on the screen.  This function directly queues the presentation
+    /// request based on the input parameters without special synchronization considerations like a swap chain present.
+    /// All previous work done on this queue will complete before the image is displayed.
+    ///
+    /// This function should never be called with a swap chain presentable image because it won't release ownership of
+    /// the presentable image index, eventually deadlocking the swap chain.
+    ///
+    /// Overall support for direct presents can be queried at platform creation time via supportNonSwapChainPresents
+    /// in @ref PlatformProperties.  Support for particular present modes is specifed via supportedDirectPresentModes
+    /// in @ref DeviceProperties.
+    ///
+    /// @note  Any images specified in presentInfo must be made resident before calling this function.
+    ///
+    /// @param [in] presentInfo Specifies the source image and destination window for the present as well as other
+    ///                         properties.
+    ///
+    /// @returns Success if the present was successfully queued.  Otherwise, one of the following errors may be
+    ///          returned:
+    ///          + ErrorInvalidValue if the flip interval is invalid.
+    ///          + ErrorInvalidValue if the present mode doesn't match the capabilities of the image.
+    ///          + ErrorInvalidFlags if the present flags don't match the capabilities of the image.
+    virtual Result PresentDirect(
+        const PresentDirectInfo& presentInfo) = 0;
+
+    /// Queues the specified image for presentation on the screen.  This function uses the provided swap chain to
+    /// determine exactly how the image should be presented (e.g., can the user see tearing).  See @ref ISwapChain for
+    /// more information on swap chain presentation.  All previous work done on this queue will complete before the
+    /// image is displayed, but future work may execute before the present is completed because swap chain present
+    /// execution may be asynchronous to the queue that initiated present.
+    ///
+    /// Assuming the presentInfo is valid, this function will always release ownership of the presentable image index
+    /// even if PAL encounters an error while executing the present.
+    ///
+    /// Queue support for swap chain presents is specified via supportsSwapChainPresents in @ref DeviceProperties.
+    /// Support for particular PresentModes is queried per SwapChainMode via IDevice::GetSwapChainInfo().
+    ///
+    /// @note  The source image specified in presentInfo must be made resident before calling this function.
+    ///
+    /// @param [in] presentInfo Specifies the source image, swap chain, and basic presentation information.
+    ///
+    /// @returns Success if the present was successfully queued.  Otherwise, one of the following errors may be
+    ///          returned:
+    ///          + ErrorInvalidPointer if the source image or swap chain are null.
+    ///          + ErrorInvalidValue if the present mode doesn't match the capabilities of the image or if the image
+    ///                              index isn't valid within the swap chain.
+    virtual Result PresentSwapChain(
+        const PresentSwapChainInfo& presentInfo) = 0;
+
+    /// Inserts a delay of a specified amount of time before processing more commands on this queue.
+    ///
+    /// Only available on timer queues.  Useful in conjunction with queue semaphores to implement frame pacing.
+    ///
+    /// @param [in] delay Time, in milliseconds, to delay before processing more commands on this queue.
+    ///
+    /// @returns Success if the delay was successfully queued.  Otherwise, one of the following errors may be returned:
+    ///          + ErrorInvalidValue if delay is less than 0.
+    virtual Result Delay(
+        Util::fmilliseconds delay) = 0;
+
+    /// Inserts a delay of a specified amount of time on this queue after a vsync on a private display object.
+    ///
+    /// Only available on timer queues.  Useful in conjunction with queue semaphores to implement pacing of GPU and CPU
+    /// operations for rendering and presentation in VR as this allows GPU commands of next frame to be sent early but
+    /// blocks GPU execution until after vsync.
+    ///
+    /// @param [in] delay   Time, in microseconds, to delay before processing more commands on this queue.
+    /// @param [in] pScreen The private screen object that the vsync is occurring and the delay is waiting on.
+    ///
+    /// @returns Success if the delay was successfully queued.  Otherwise, one of the following errors may be returned:
+    ///          + ErrorInvalidValue if delay is less than 0.
+    virtual Result DelayAfterVsync(
+        Util::fmicroseconds   delay,
+        const IPrivateScreen* pScreen) = 0;
+
+    /// Updates page mappings for virtual GPU memory allocations.
+    ///
+    /// @param [in] rangeCount  Number of ranges to remap (i.e., size of the pRanges array).
+    /// @param [in] pRanges     Defines the set of remappings from virtual GPU memory object pages to real GPU
+    ///                         memory object pages.
+    /// @param [in] doNotWait   If true, then this paging operation will be executed on the Queue immediately, without
+    ///                         waiting for any previous rendering to finish first. On platforms that don't support
+    ///                         this, the flag will be ignored.
+    /// @param [in] pFence      Optional. Pointer to an IFence, which will be signaled after the VA remapping.
+    ///
+    /// @returns Success if the remappings were executed successfully.  It is assumed that the following conditions are
+    ///          met for the input to this function:
+    ///          + rangeCount is not 0.
+    ///          + The page range for all members of pRanges are valid.
+    ///          + pRanges is not null.
+    ///          + pVirtualGpuMem is not null for any member of pRanges.
+    ///          + pRanges does not specify a real GPU memory object as a virtual GPU memory object or vice versa.
+    virtual Result RemapVirtualMemoryPages(
+        uint32                         rangeCount,
+        const VirtualMemoryRemapRange* pRanges,
+        bool                           doNotWait,
+        IFence*                        pFence) = 0;
+
+    /// Copies page mappings from one virtual GPU memory object to another.
+    ///
+    /// @param [in] rangeCount  Number of ranges to copy (i.e., size of the pRanges array).
+    /// @param [in] pRanges     Defines the set of page mappings to copy between virtual GPU memory objects.
+    /// @param [in] doNotWait   If true, then this paging operation will be executed on the Queue immediately, without
+    ///                         waiting for any previous rendering to finish first. On platforms that don't support
+    ///                         this, the flag will be ignored.
+    ///
+    /// @returns Success if the mappings were copied successfully.  It is assumed that the following conditions are
+    ///          met for the input to this function:
+    ///          + rangeCount is not 0.
+    ///          + The page range for all members of pRanges are valid.
+    ///          + pRanges is not null.
+    ///          + pSrcGpuMem or pDstGpuMem is not null for any member of pRanges.
+    ///          + pRanges does not specify a real GPU memory object as source or destination
+    virtual Result CopyVirtualMemoryPageMappings(
+        uint32                                    rangeCount,
+        const VirtualMemoryCopyPageMappingsRange* pRanges,
+        bool                                      doNotWait) = 0;
+
+    /// Associates the provided Fence object with the last submission on this queue object. The Fence can be used via
+    /// GetStatus() to get the status of the last Submit, however no event will be created/set for the Fence so
+    /// WaitForFences() should NOT be called on the fence after this association.
+    ///
+    /// @see IFence::GetStatus()
+    /// @see IFence::WaitForFences()
+    ///
+    /// @param [in] pFence   Fence object to be associated with the last Submit on this queue
+    ///
+    /// @returns Success if the association was successful. ErrorUnavailable will be returned in there has not yet been
+    ///          a Submit on this queue.
+    virtual Result AssociateFenceWithLastSubmit(
+        IFence* pFence) = 0;
+
+    /// Set execution priority for the current queue, it allows to elevate execution priority of submitted command
+    /// buffers, but it has no effect on command buffers that have already been submitted for execution. Elevating
+    /// the queue priority to medium or high would allow to temporary stall a low priority queue execution and execute
+    /// its work as soon as the low priority queue starts draining.
+    ///
+    /// @param [in] priority The priority level of the queue.
+    virtual void SetExecutionPriority(
+        QueuePriority priority) = 0;
+
+    /// Returns a list of GPU memory allocations used by this queue.
+    ///
+    /// @param [in,out] pNumEntries    Input value specifies the available size in pAllocInfoList; output value
+    ///                                reports the number of GPU memory allocations.
+    /// @param [out]    pAllocInfoList If pAllocInfoList=nullptr, then pNumEntries is ignored on input.  On output it
+    ///                                will reflect the number of allocations that make up this queue.  If
+    ///                                pAllocInfoList!=nullptr, then on input pNumEntries is assumed to be the number
+    ///                                of entries in the pAllocInfoList array.  On output, pNumEntries reflects the
+    ///                                number of entries in pAllocInfoList that are valid.
+    /// @returns Success if the allocation info was successfully written to the buffer.
+    ///          + ErrorInvalidValue if the caller provides a buffer size that is different from the size needed.
+    ///          + ErrorInvalidPointer if pNumEntries is nullptr.
+    virtual Result QueryAllocationInfo(
+        size_t*                    pNumEntries,
+        GpuMemSubAllocInfo* const  pAllocInfoList) = 0;
+
+    /// Returns the QueueType for the queue
+    virtual QueueType Type() const = 0;
+
+    /// Returns the EngineType for the queue
+    virtual EngineType GetEngineType() const = 0;
+
+    /// Queries the kernel context info associated with this queue and copies it into pKernelContextInfo.
+    ///
+    /// Only supported on Windows platforms.
+    ///
+    /// @param [out] pKernelContextInfo Pointer to a KernelContextInfo struct to copy the information into.
+    /// @returns Success if the information is successfully copied into the output struct.
+    ///          + ErrorInvalidPointer if pKernelContextInfo is nullptr.
+    ///          + ErrorUnavailable if kernel context information is not available on the current platform.
+    virtual Result QueryKernelContextInfo(KernelContextInfo* pKernelContextInfo) const = 0;
+
+    /// Returns the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @returns Pointer to client data.
+    void* GetClientData() const
+    {
+        return m_pClientData;
+    }
+
+    /// Sets the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @param  [in]    pClientData     A pointer to arbitrary client data.
+    void SetClientData(
+        void* pClientData)
+    {
+        m_pClientData = pClientData;
+    }
+
+protected:
+    /// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
+    /// called the proper create method.
+    IQueue() : m_pClientData(nullptr) {}
+
+    /// @internal Destructor.  Prevent use of delete operator on this interface.  Queues will be destroyed when the
+    /// associated device is destroyed.
+    virtual ~IQueue() { }
+
+private:
+    /// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
+    /// and set via SetClientData().
+    /// For non-top-layer objects, this will point to the layer above the current object.
+    void* m_pClientData;
+};
+
+} // Pal
@@ -0,0 +1,275 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palQueueSemaphore.h
+ * @brief Defines the Platform Abstraction Library (PAL) IQueueSemaphore interface and related types.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palDestroyable.h"
+#include <chrono>
+
+#if defined(_WIN32)
+struct _SECURITY_ATTRIBUTES;
+#endif
+
+namespace Pal
+{
+
+// Forward declarations.
+class IQueueSemaphore;
+
+/// Specifies properties for @ref IQueueSemaphore creation.  Input structure to IDevice::CreateQueueSemaphore().
+struct QueueSemaphoreCreateInfo
+{
+    union
+    {
+        struct
+        {
+            /// This queue semaphore may be opened for use by a different device.
+            /// For DX12 native fence, the flag needs to be consistent with D3DDDI_SYNCHRONIZATIONOBJECT_FLAGS.Shared
+            /// given by DX runtime.
+            uint32 shareable              :  1;
+            /// This queue semaphore can only be shared through Nt handle.
+            /// For DX12 native fence, the flag needs to be consistent with
+            /// D3DDDI_SYNCHRONIZATIONOBJECT_FLAGS.NtSecuritySharing given by DX runtime.
+            uint32 sharedViaNtHandle      :  1;
+            uint32 externalOpened         :  1;  ///< Semaphore was created by other APIs
+            /// This queue semaphore is a timeline semaphore. Timeline semaphores have a 64-bit unsigned integer payload
+            /// which gets monotonically increased with each Signal operation. A wait on a timeline semaphore blocks the
+            /// waiter until the specified payload value has been signaled.
+            /// For DX12 native fence, runtime determines initialCount. Therefore, timeline flag has to be set.
+            uint32 timeline               :  1;
+            /// Do not signal the queue semaphore to max if the device is lost.
+            /// For DX12 native fence, the flag needs to be consistent with
+            /// D3DDDI_SYNCHRONIZATIONOBJECT_FLAGS.NoSignalMaxValueOnTdr given by DX runtime.
+            uint32 noSignalOnDeviceLost   :  1;
+            /// For native fence only. If it's 0x0, the native fence type is D3DDDI_NATIVEFENCE_TYPE_DEFAULT.
+            /// If it's 0x1, native fence type is D3DDDI_NATIVEFENCE_TYPE_INTRA_GPU.
+            /// For DX12, the value is determined by runtime. DXCP needs to set it by reading D3DDDI_NATIVEFENCEINFO.
+            uint32 gpuOnly                :  1;
+            /// This queue semaphore will be a monitored fence if this flag set, even if OS supports native fence.
+            uint32 forceUseMonitoredFence :  1;
+            uint32 reserved               : 25;  ///< Reserved for future use.
+        };
+        uint32 u32All;              ///< Flags packed as 32-bit uint.
+    } flags;                        ///< Queue semaphore creation flags.
+
+    uint32 maxCount;                ///< The maximum signal count; once reached, further signals are dropped.  Must be
+                                    ///  non-zero and no more than maxSemaphoreCount in @ref DeviceProperties.  For
+                                    ///  example, a value of one would request a binary semaphore.
+                                    ///  NOTE: maxCount does not apply to timeline semaphores.
+
+    uint64 initialCount;            ///< Initial value for timeline semaphores. (or)
+                                    ///  Initial count value for counting semaphores.
+                                    ///  Must not be larger than maxCount for counting semaphores.
+                                    ///  For DX12 native fence, DXCP needs to pass InitialFenceValue from
+                                    ///  D3DDDI_NATIVEFENCEINFO.
+
+};
+
+/// Specifies parameters for opening a queue semaphore for use on another device.  Input structure to
+/// IDevice::OpenSharedQueueSemaphore().
+struct QueueSemaphoreOpenInfo
+{
+    /// Shared queue semaphore object from another device to be opened.
+    IQueueSemaphore* pSharedQueueSemaphore;
+};
+
+/// Specifies parameters for opening a queue semaphore created by other APIs such as D3D.
+struct ExternalQueueSemaphoreOpenInfo
+{
+    union
+    {
+        struct
+        {
+            uint32 crossProcess       :  1;   ///< This semaphore is created in another process.
+            uint32 sharedViaNtHandle  :  1;   ///< The shared semaphore handle is NT handle.
+            uint32 isReference        :  1;   ///< If set, then the opened semaphore will reference the same sync
+                                              ///< object in the kernel.  Otherwise, the object is copied to the
+                                              ///< new Semaphore.
+            /// This queue semaphore is a timeline semaphore. Timeline semaphores have a 64-bit unsigned integer payload
+            /// which gets monotonically increased with each Signal operation. A wait on a timeline semaphore blocks the
+            /// waiter until the specified payload value has been signaled.
+            uint32 timeline          :  1;
+            uint32 reserved          : 28;  ///< Reserved for future use.
+        };
+        uint32 u32All;                  ///< Flags packed as 32-bit uint.
+    } flags;                            ///< External queue semaphore open flags.
+
+    OsExternalHandle externalSemaphore; ///< External shared semaphore handle.
+
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 882
+#if defined(__unix__) && PAL_KMT_BUILD
+    uint64           syncFdSignalValue; ///< Signal timeline value when importing the state of a sync file
+#endif
+#endif
+};
+
+/// Specifies parameters for exporting a queue semaphore. Input structure to IQueueSemaphore::ExportExternalHandle().
+struct QueueSemaphoreExportInfo
+{
+    union
+    {
+        struct
+        {
+            uint32 isReference        :  1;   ///< If set, then the semaphore exporting a handle that reference the
+                                              ///< same sync object in the kernel.  Otherwise, the object is copied
+                                              ///< to the new Semaphore.
+            uint32 reserved           : 31;   ///< Resevered for future use.
+        };
+        uint32 u32All;                        ///< Flags packed as 32-bit uint.
+    } flags;                                  ///< External queue semaphore export flags.
+
+#if PAL_KMT_BUILD
+    const _SECURITY_ATTRIBUTES* pSecurityAttributes; ///< It specifies the security descriptor and the inheritable
+                                                     ///  attribute.
+    const wchar_t*              pNtObjectName;       ///< A name to NT handle, if the object is exported as a NT
+                                                     ///  handle with a name, and then the handle can be acquired
+                                                     ///  via this name.
+    uint32                      accessFlags;         ///< Desried access rights of GPU memory.
+#if defined(__unix__)
+    uint64                      syncFdWaitValue;     ///< Wait timeline value when exporting the state of a sync file
+#endif
+#endif
+};
+
+/**
+ ***********************************************************************************************************************
+ * @interface IQueueSemaphore
+ * @brief     Semaphore object used to synchronize GPU work performed by multiple, parallel queues.
+ *
+ * These semaphores are used by calling IQueue::SignalQueueSemaphore() and IQueue::WaitQueueSemaphore().
+ *
+ * @see IDevice::CreateQueueSemaphore()
+ * @see IDevice::OpenSharedQueueSemaphore()
+ ***********************************************************************************************************************
+ */
+class IQueueSemaphore : public IDestroyable
+{
+public:
+    /// An IQueue::WaitQueueSemaphore operation may need to be sent down to the OS after the corresponding
+    /// IQueue::SignalQueueSemaphore operation due to GPU scheduler limitations. This method checks if any queues have
+    /// batched-up commands waiting for a SignalQueueSemaphore operation to appear.
+    ///
+    /// @returns True if one or more queues have some number of commands batched-up waiting for other queues to signal
+    ///          this semaphore. False otherwise.
+    virtual bool HasStalledQueues() = 0;
+
+    /// Query timeline Semaphore payload
+    ///
+    /// @param [out] pValue           returned payload from querying
+    ///
+    /// @returns Success if the timeline semaphore is queried successful.  Otherwise, one of the following errors may
+    ///          be returned:
+    ///          + ErrorInvalidValue if an unexpected conversion error occurs.
+    ///          + ErrorInvalidObjectType if semaphore is non-timeline type.
+    virtual Result QuerySemaphoreValue(
+        uint64*                  pValue) = 0;
+
+    /// Wait on timeline Semaphore points, to be clarified, this is a CPU wait.
+    ///
+    /// @param    [in]  value            Indicate which point to be waited.
+    /// @param    [in]  timeout          the max waiting time, timeout is the timeout period in units of nanoseconds.
+    ///
+    /// @returns Success if the timeline semaphore point is waited successful.  Otherwise, one of the following errors
+    ///          may be returned:
+    ///          + ErrorInvalidValue if an unexpected conversion error occurs.
+    ///          + ErrorInvalidObjectType if semaphore is non-timeline type.
+    virtual Result WaitSemaphoreValue(
+        uint64                   value,
+        std::chrono::nanoseconds timeout) = 0;
+
+    /// Signal on timeline Semaphore points, to be clarified, this is a CPU signal.
+    ///
+    /// @param    [in]  value            Indicate which point to be signaled.
+    ///
+    /// @returns Success if the timeline semaphore point is signaled successful.  Otherwise, one of the following errors
+    ///          may be returned:
+    ///          + ErrorInvalidValue if an unexpected conversion error occurs.
+    ///          + ErrorInvalidObjectType if semaphore is non-timeline type.
+    virtual Result SignalSemaphoreValue(
+        uint64                   value) = 0;
+
+#if PAL_KMT_BUILD || PAL_AMDGPU_BUILD
+    /// Returns an OS-specific handle which can be used to refer to this semaphore object across processes. This will
+    /// return a null or invalid handle if the object was not created with the external create flag set.
+    ///
+    /// @param  [in] exportInfo    Information describing how the Semamphore handle should be exported.
+    /// @note This function is only available for Linux builds.
+    ///
+    /// @returns An OS-specific handle which can be used to access the semaphore object across processes.
+    virtual OsExternalHandle ExportExternalHandle(
+        const QueueSemaphoreExportInfo& exportInfo) const = 0;
+#endif
+
+#if defined(_WIN32)
+    /// Returns an OS-specific handle which can be used by another device to access the semaphore object.
+    ///
+    /// @returns An OS-specific handle which can be used by another device to access the semaphore object.
+    virtual OsExternalHandle ExportKmtHandle() const = 0;
+#endif
+
+    /// Returns the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @returns Pointer to client data.
+    void* GetClientData() const
+    {
+        return m_pClientData;
+    }
+
+    /// Sets the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @param  [in]    pClientData     A pointer to arbitrary client data.
+    void SetClientData(
+        void* pClientData)
+    {
+        m_pClientData = pClientData;
+    }
+
+protected:
+    /// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
+    /// called the proper create method.
+    IQueueSemaphore() : m_pClientData(nullptr) {}
+
+    /// @internal Destructor.  Prevent use of delete operator on this interface.  Client must destroy objects by
+    /// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
+    /// object on their own.
+    virtual ~IQueueSemaphore() { }
+
+private:
+    /// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
+    /// and set via SetClientData().
+    /// For non-top-layer objects, this will point to the layer above the current object.
+    void* m_pClientData;
+};
+
+} // Pal
@@ -0,0 +1,251 @@
+/*
+ ***********************************************************************************************************************
+ *
+ *  Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a copy
+ *  of this software and associated documentation files (the "Software"), to deal
+ *  in the Software without restriction, including without limitation the rights
+ *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ *  copies of the Software, and to permit persons to whom the Software is
+ *  furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in all
+ *  copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ *  SOFTWARE.
+ *
+ **********************************************************************************************************************/
+/**
+ ***********************************************************************************************************************
+ * @file  palShaderLibrary.h
+ * @brief Defines the Platform Abstraction Library (PAL) IShaderLibrary interface and related types.
+ ***********************************************************************************************************************
+ */
+
+#pragma once
+
+#include "pal.h"
+#include "palDestroyable.h"
+#include "palStringView.h"
+#include "palSpan.h"
+
+namespace Pal
+{
+
+struct GpuMemSubAllocInfo;
+
+/// Common flags controlling creation of shader libraries.
+union LibraryCreateFlags
+{
+    struct
+    {
+        uint32 clientInternal  : 1;  ///< Internal library not created by the application.
+        uint32 isGraphics      : 1;  ///< Whether it is a graphics library
+        uint32 reserved        : 30; ///< Reserved for future use.
+    };
+    uint32 u32All;                  ///< Flags packed as 32-bit uint.
+};
+
+/// Specifies properties about an indirect function belonging to a @ref IShaderLibrary object.  Part of the input
+/// structure to IDevice::CreateShaderLibrary().
+struct ShaderLibraryFunctionInfo
+{
+    Util::StringView<char> symbolName;  ///< ELF Symbol name for the associated function.
+    gpusize                gpuVirtAddr; ///< [out] GPU virtual address of the function.  This is computed by PAL during
+                                        ///  library creation.
+};
+
+/// Specifies a shader sub type / ShaderKind.
+enum class ShaderSubType : uint32
+{
+    Unknown = 0,
+    Traversal,
+    RayGeneration,
+    Intersection,
+    AnyHit,
+    ClosestHit,
+    Miss,
+    Callable,
+    LaunchKernel,           ///< Raytracing launch kernel
+    Count
+};
+
+/// Specifies properties for creation of a compute @ref IShaderLibrary object.  Input structure to
+/// IDevice::CreateShaderLibrary().
+struct ShaderLibraryCreateInfo
+{
+    LibraryCreateFlags  flags;      ///< Library creation flags
+
+    const void*  pCodeObject;       ///< Pointer to code-object ELF binary implementing the Pipeline ABI interface.
+                                    ///  The code-object ELF contains pre-compiled shaders, register values, and
+                                    ///  additional metadata.
+    size_t       codeObjectSize;    ///< Size of code object in bytes.
+};
+
+/// Reports properties of a compiled library.
+struct LibraryInfo
+{
+    PipelineHash internalLibraryHash;  ///< 128-bit identifier extracted from this library's ELF binary, composed of
+                                       ///  the state the compiler decided was appropriate to identify the compiled
+                                       ///  library.  The lower 64 bits are "stable"; the upper 64 bits are "unique".
+};
+
+/// Reports shader stats. Multiple bits set in the shader stage mask indicates that multiple shaders have been combined
+/// due to HW support. The same information will be repeated for both the constituent shaders in this case.
+struct ShaderLibStats
+{
+    ShaderHash         shaderHash;             ///< Shader hash.
+    CommonShaderStats  common;                 ///< The shader compilation parameters for this shader.
+    /// Maximum number of VGPRs the compiler was allowed to use for this shader.  This limit will be the minimum
+    /// of any architectural restriction and any client-requested limit intended to increase the number of waves in
+    /// flight.
+    uint32             numAvailableVgprs;
+    /// Maximum number of SGPRs the compiler was allowed to use for this shader.  This limit will be the minimum
+    /// of any architectural restriction and any client-requested limit intended to increase the number of waves in
+    /// flight.
+    uint32             numAvailableSgprs;
+    size_t             isaSizeInBytes;          ///< Size of the shader ISA disassembly for this shader.
+    PipelineHash       palInternalLibraryHash;  ///< Internal hash of the shader compilation data used by PAL.
+    uint32             stackFrameSizeInBytes;   ///< Shader function stack frame size
+    ShaderSubType      shaderSubType;           ///< ShaderSubType / Shader Kind
+    CompilerStackSizes cpsStackSizes;           ///< Stack used in Continuation
+};
+
+/**
+ ***********************************************************************************************************************
+ * @interface IShaderLibrary
+ * @brief     Object containing one or more shader functions stored in GPU memory.  These shader functions are callable
+ *            from the shaders contained within IPipeline objects.
+ *
+ * Before a pipeline which calls into this library is bound to a command buffer (using @ref ICmdBuffer::BindPipeline),
+ * the client must call @ref IPipeline::LinkWithLibraries() and specify this library in the list of linked libraries.
+ * Failure to comply with this requirement is an error and will result in undefined behavior.
+ *
+ * @see IDevice::CreateShaderLibrary()
+ * @see IPipeline::LinkWithLibraries()
+ ***********************************************************************************************************************
+ */
+class IShaderLibrary : public IDestroyable
+{
+public:
+    /// Returns properties of this library and its corresponding shader functions.
+    ///
+    /// @returns Property structure describing this library.
+    virtual const LibraryInfo& GetInfo() const = 0;
+
+    /// Returns a list of GPU memory allocations used by this library.
+    ///
+    /// @param [in,out] pNumEntries    Input value specifies the available size in pAllocInfoList; output value
+    ///                                reports the number of GPU memory allocations.
+    /// @param [out]    pAllocInfoList If pAllocInfoList=nullptr, then pNumEntries is ignored on input.  On output it
+    ///                                will reflect the number of allocations that make up this pipeline.  If
+    ///                                pAllocInfoList!=nullptr, then on input pNumEntries is assumed to be the number
+    ///                                of entries in the pAllocInfoList array.  On output, pNumEntries reflects the
+    ///                                number of entries in pAllocInfoList that are valid.
+    /// @returns Success if the allocation info was successfully written to the buffer.
+    ///          + ErrorInvalidValue if the caller provides a buffer size that is different from the size needed.
+    ///          + ErrorInvalidPointer if pNumEntries is nullptr.
+    virtual Result QueryAllocationInfo(
+        size_t*                    pNumEntries,
+        GpuMemSubAllocInfo* const  pAllocInfoList) const = 0;
+
+    /// Gives the client access to the resource ID used for internal Pal events.
+    /// EX: Resource Create, Resource Bind, Resource Destroy.
+    ///
+    /// @returns The Resource ID.
+    virtual const void* GetResourceId() const = 0;
+
+    /// Obtains the binary code object for this library.
+    ///
+    /// @param [in, out] pSize  Represents the size of the shader ISA code.
+    ///
+    /// @param [out] pBuffer    If non-null, the library ELF is written in the buffer. If null, the size required
+    ///                         for the library ELF is given out in the location pSize.
+    ///
+    /// @returns Success if the library binary was fetched successfully.
+    ///          +ErrorUnavailable if the library binary was not fetched successfully.
+    virtual Result GetCodeObject(
+        uint32*  pSize,
+        void*    pBuffer) const = 0;
+
+    /// Returns the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @returns Pointer to client data.
+    void* GetClientData() const { return m_pClientData; }
+
+    /// Sets the value of the associated arbitrary client data pointer.
+    /// Can be used to associate arbitrary data with a particular PAL object.
+    ///
+    /// @param  [in]    pClientData     A pointer to arbitrary client data.
+    void SetClientData(
+        void* pClientData)
+    {
+        m_pClientData = pClientData;
+    }
+
+    /// Obtains the compiled shader ISA code for the shader function specified.
+    ///
+    /// @param [in]  pShaderExportName The shader exported name
+    ///
+    /// @param [in, out] pSize  Represents the size of the shader ISA code.
+    ///
+    /// @param [out] pBuffer    If non-null, the shader ISA code is written in the buffer. If null, the size required
+    ///                         for the shader ISA is given out in the location pSize.
+    ///
+    /// @returns Success if the shader ISA code was fetched successfully.
+    ///          +ErrorUnavailable if the shader ISA code was not fetched successfully.
+
+    virtual Result GetShaderFunctionCode(
+        Util::StringView<char> shaderExportName,
+        size_t*                pSize,
+        void*                  pBuffer) const = 0;
+
+    /// Obtains the shader pre and post compilation stats/params for the specified shader.
+    ///
+    /// @param [in]  pShaderExportName The shader exported name
+    ///
+    /// @param [out] pShaderStats Pointer to the ShaderStats structure which will be filled with the shader stats for
+    ///                           the shader stage mentioned in shaderType. This cannot be nullptr.
+    /// @param [in]  getDisassemblySize If set to true performs disassembly on the shader binary code and reports the
+    ///                                 size of the disassembly string in ShaderStats::isaSizeInBytes. Else reports 0.
+    /// @returns Success if the stats were successfully obtained for this shader, including the shader disassembly size.
+    ///          +ErrorUnavailable if a wrong shader stage for this pipeline was specified, or if some internal error
+    ///                           occured.
+    virtual Result GetShaderFunctionStats(
+        Util::StringView<char> shaderExportName,
+        ShaderLibStats*        pShaderStats) const = 0;
+
+    /// Returns the function list owned by this shader library
+    ///
+    /// @returns A list of ShaderLibraryFunctionInfo.
+    virtual const Util::Span<const ShaderLibraryFunctionInfo> GetShaderLibFunctionInfos() const = 0;
+
+protected:
+    /// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
+    /// called the proper create method.
+    IShaderLibrary() : m_pClientData(nullptr) { }
+
+    /// @internal Destructor.  Prevent use of delete operator on this interface.  Client must destroy objects by
+    /// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
+    /// object on their own.
+    virtual ~IShaderLibrary() { }
+
+private:
+    /// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
+    /// and set via SetClientData().
+    /// For non-top-layer objects, this will point to the layer above the current object.
+    void*  m_pClientData;
+
+    IShaderLibrary(const IShaderLibrary&) = delete;
+    IShaderLibrary& operator=(const IShaderLibrary&) = delete;
+};
+
+} // Pal