/* *********************************************************************************************************************** * * Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * **********************************************************************************************************************/ /** *********************************************************************************************************************** * @file palDevice.h * @brief Defines the Platform Abstraction Library (PAL) IDevice interface and related types. *********************************************************************************************************************** */ #pragma once #include "palCmdAllocator.h" #include "palDestroyable.h" #include "palFence.h" #include "palFile.h" #include "palGpuMemory.h" #include "palImage.h" #include "palInlineFuncs.h" #include "palLib.h" #include "palPerfExperiment.h" #include "palPipeline.h" #include "palQueue.h" #include #if PAL_KMT_BUILD struct _SECURITY_ATTRIBUTES; #endif namespace Util { class Event; } namespace Pal { // Forward declarations. class IBorderColorPalette; class ICmdAllocator; class ICmdBuffer; class IColorBlendState; class IColorTargetView; class IDepthStencilState; class IDepthStencilView; class IDevice; class IFence; class IGpuEvent; class IGpuMemory; class IImage; class IIndirectCmdGenerator; class IMsaaState; class IPerfExperiment; class IPipeline; class IPrivateScreen; class IQueryPool; class IQueue; class IQueueSemaphore; class IShaderLibrary; class ISwapChain; struct BorderColorPaletteCreateInfo; struct CmdAllocatorCreateInfo; struct CmdBufferCreateInfo; struct ColorBlendStateCreateInfo; struct ColorTargetViewCreateInfo; struct ComputePipelineCreateInfo; struct DepthStencilStateCreateInfo; struct DepthStencilViewCreateInfo; struct ExternalImageOpenInfo; struct ExternalGpuMemoryOpenInfo; struct ExternalQueueSemaphoreOpenInfo; struct ExternalResourceOpenInfo; struct GpuEventCreateInfo; struct GpuMemoryCreateInfo; struct GpuMemoryOpenInfo; struct GpuMemoryRef; struct GraphicsPipelineCreateInfo; struct ImageCreateInfo; struct IndirectCmdGeneratorCreateInfo; struct MsaaStateCreateInfo; struct MsaaQuadSamplePattern; struct PeerGpuMemoryOpenInfo; struct PeerImageOpenInfo; struct PerfExperimentCreateInfo; struct PinnedGpuMemoryCreateInfo; struct PresentableImageCreateInfo; struct PrivateScreenCreateInfo; struct PrivateScreenNotifyInfo; struct QueryPoolCreateInfo; struct QueueCreateInfo; struct QueueSemaphoreCreateInfo; struct QueueSemaphoreOpenInfo; struct ShaderLibraryCreateInfo; struct SwapChainCreateInfo; struct SwapChainProperties; struct SvmGpuMemoryCreateInfo; struct GraphicPipelineViewInstancingInfo; #if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 881 enum class WsiPlatform : uint32; #else enum WsiPlatform : uint32; #endif enum class PipelineBindPoint : uint32; enum class VaRange : uint32; enum class SwapChainMode : uint32; namespace CmdDisassembly { class ICmdBufferReporting; } /// Maximum string length for GPU names. @see DeviceProperties. constexpr uint32 MaxDeviceName = 256; /// Maximum number of indirect user-data tables managed by PAL's command buffer objects. @see DeviceFinalizeInfo. constexpr uint32 MaxIndirectUserDataTables = 1; /// Maximum number of supported entries in the MSAA sample pattern palette. See IDevice::SetSamplePatternPalette(). constexpr uint32 MaxSamplePatternPaletteEntries = 16; /// Maximum number of supported units in the gpu. These can be much larger than the actual values, but useful for arrays. constexpr uint32 MaxShaderEngines = 32; /// Maximum number of supported subunits each Shader Engine splits into (SH or SA, depending on generation) constexpr uint32 MaxShaderArraysPerSe = 2; /// Size of the Active Pixel Packer Mask in DWORDs constexpr uint32 ActivePixelPackerMaskDwords = 4; /// Maximum number of pixel packers per SE expected by PAL constexpr uint32 MaxPixelPackerPerSe = 4; /// Defines host flags for Semaphore/Fence Array wait enum HostWaitFlags : uint32 { HostWaitAny = 0x1, ///< if set this bit, return after any signle semaphore/fence in the array has /// completed. if not set, wait for completion of all semaphores/fences in the /// array before returning. }; /// Specifies what type of GPU a particular IDevice is (i.e., discrete vs. integrated). enum class GpuType : uint32 { Unknown = 0x0, ///< The GPU type can't be determined and is unknown. Integrated = 0x1, ///< Integrated GPU (i.e., APU). Discrete = 0x2, ///< Discrete GPU. Virtual = 0x3, ///< Virtualized GPU. Count }; #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 888 /// Specifies which operating-system-support IP level (OSSIP) this device has. enum class OssIpLevel : uint32 { _None = 0x0, ///< @internal The device does not have an OSSIP block, or its level cannot be determined // Unfortunately for Linux clients, X.h includes a "#define None 0" macro. Clients have their choice of either // undefing None before including this header or using _None when dealing with PAL. #ifndef None None = _None, ///< The device does not have an OSSIP block, or its level cannot be determined #endif OssIp2_4 = 0x3, OssIp4 = 0x4, }; /// Specifies which VCE IP level this device has. enum class VceIpLevel : uint32 { _None = 0x0, ///< @internal The device does not have an VCEIP block, or its level cannot be determined // Unfortunately for Linux clients, X.h includes a "#define None 0" macro. Clients have their choice of either // undefing None before including this header or using _None when dealing with PAL. #ifndef None None = _None, ///< The device does not have an VCEIP block, or its level cannot be determined #endif VceIp1 = 0x1, VceIp2 = 0x2, VceIp3 = 0x3, VceIp3_1 = 0x4, VceIp3_4 = 0x5, VceIp4 = 0x6, }; /// Specifies which UVD IP level this device has. enum class UvdIpLevel : uint32 { _None = 0x0, ///< @internal The device does not have an UVDIP block, or its level cannot be determined // Unfortunately for Linux clients, X.h includes a "#define None 0" macro. Clients have their choice of either // undefing None before including this header or using _None when dealing with PAL. #ifndef None None = _None, ///< The device does not have an UVDIP block, or its level cannot be determined #endif UvdIp3_2 = 0x1, UvdIp4 = 0x2, UvdIp4_2 = 0x2, UvdIp5 = 0x3, UvdIp6 = 0x4, UvdIp6_2 = 0x5, UvdIp6_3 = 0x6, UvdIp7 = 0x7, UvdIp7_2 = 0x8, }; #endif /// Specifies which VCN IP level this device has. enum class VcnIpLevel : uint32 { _None = 0x0, ///< @internal The device does not have an VCNIP block, or its level cannot be determined // Unfortunately for Linux clients, X.h includes a "#define None 0" macro. Clients have their choice of either // undefing None before including this header or using _None when dealing with PAL. #ifndef None None = _None, ///< The device does not have an VCNIP block, or its level cannot be determined #endif #if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 888 #else // PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 888 VcnIp1 = 0x1, #endif }; #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 888 /// Specifies which SPU IP level this device has. enum class SpuIpLevel : uint32 { _None = 0x0, ///< @internal The device does not have an SPUIP block, or its level cannot be determined #ifndef None None = _None, ///< The device does not have an SPUIP block, or its level cannot be determined #endif SpuIp = 0x1, }; #endif /// Specifies which PSP IP level this device has. enum class PspIpLevel : uint32 { _None = 0x0, ///< @internal The device does not have an PSPIP block, or its level cannot be determined #ifndef None None = _None, ///< The device does not have an PSPIP block, or its level cannot be determined #endif PspIp10 = 0x1, }; /// Specified video decode type enum class VideoDecodeType : uint32 { H264 = 0x0, ///< H264 VLD Vc1 = 0x1, ///< VC1 VLD Mpeg2Idct = 0x2, ///< Partial MPEG2 decode (IT+MP) Mpeg2Vld = 0x3, ///< Full MPEG2 decode (RE+IT+MP+DB) Mpeg4 = 0x4, ///< MPEG4 Wmv9 = 0x5, ///< WMV9 IDCT Mjpeg = 0x6, ///< Motion JPEG Hevc = 0x7, ///< HEVC Vp9 = 0x8, ///< VP9 Hevc10Bit = 0x9, ///< HEVC 10bit Vp910Bit = 0xa, ///< VP9 10bit Av1 = 0xb, ///< AV1 8/10bit Av112Bit = 0xc, ///< AV1 12bit Count, }; /// Video CODEC to use for encoding enum class VideoEncodeCodec : uint32 { H264 = 0x0, ///< H.264 H265 = 0x1, ///< H.265 Av1 = 0x2, ///< AV1 Count }; /// Specifies a virtual address range memory should be allocated in. enum class VaRange : uint32 { Default, ///< Default VA range. Choose this for most allocations. DescriptorTable, ///< Place the allocation in a 4GB VA range reserved by PAL for descriptor tables. Knowing /// an allocation is allocated in this range, only one user data entry is required to /// specify a descriptor table. @see ResourceMappingNodeType. ShadowDescriptorTable, ///< Place the allocation in a 4GB VA range reserved by PAL for "shadow" descriptor tables. /// A shadow descriptor table is an additional table with the same layout as its parent /// descriptor table that can hold infrequently needed data like fmask SRDs or UAV counter /// data. This scheme allows the client and SC to work out a known location for /// infrequently needed data without wasting a user data entry or wasting half of every /// descriptor cache line. /// Only supported if DeviceProperties::gpuMemoryProperties::flags::shadowDescVaSupport is /// set. Svm, ///< Place the allocation in a VA range reserved by PAL for shared virtual memory(SVM). /// This is a GPU VA range that is reserved also on the CPU-side. /// The size of reserved VA is set by PAL client by calling CreatePlatform. CaptureReplay, ///< Place the allocation in a VA range reserved for capture and playback. Count, }; /// Enumerates tmz(trusted memory zone) support level. enum class TmzSupportLevel : uint32 { None = 0, ///< TMZ not supported. PerQueue = 1, ///< Enable TMZ mode per queue. PerSubmission = 2, ///< Enable TMZ mode per submission. PerCommandOp = 3 ///< Enable TMZ mode per command operation. }; /// How to interpret a single bit in a swizzle equation. union SwizzleEquationBit { struct { uint8 valid : 1; ///< Indicates whether this channel setting is valid. uint8 channel : 2; ///< 0 for x channel, 1 for y channel, 2 for z channel. uint8 index : 5; ///< The channel index. }; uint8 u8All; ///< The above values packed in an 8-bit uint. }; constexpr uint32 SwizzleEquationMaxBits = 20; ///< Swizzle equations will consider no more than this many bits. constexpr uint8 InvalidSwizzleEqIndex = 0xFF; ///< Indicates an invalid swizzle equation index in the equation table. constexpr uint8 LinearSwizzleEqIndex = 0xFE; ///< An invalid eq. index indicating a row-major, linear memory layout. /// Texture fetch meta-data capabilities bitfield definition, used with tcCompatibleMetaData setting enum TexFetchMetaDataCaps : uint32 { TexFetchMetaDataCapsNoAaColor = 0x00000001, TexFetchMetaDataCapsMsaaColor = 0x00000002, TexFetchMetaDataCapsFmask = 0x00000004, TexFetchMetaDataCapsNoAaDepth = 0x00000008, TexFetchMetaDataCapsMsaaDepth = 0x00000010, TexFetchMetaDataCapsAllowStencil = 0x00000020, TexFetchMetaDataCapsAllowZ16 = 0x00000040, }; #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 936 /// Catalyst AI setting enums enum CatalystAiSettings : uint32 { CatalystAiDisable = 0, CatalystAiEnable = 1, CatalystAiMaximum = 2, }; #endif /// Texture Filter optimization enum values enum TextureFilterOptimizationSettings : uint32 { TextureFilterOptimizationsDisabled = 0, TextureFilterOptimizationsEnabled = 1, TextureFilterOptimizationsAggressive = 2, }; /// Distribution Tess Mode enum values enum DistributionTessMode : uint32 { DistributionTessOff = 0, DistributionTessDefault = 1, DistributionTessPatch = 2, DistributionTessDonut = 3, DistributionTessTrapezoid = 4, DistributionTessTrapezoidOnly = 5, }; /// Defines the context roll optimization flags enum ContextRollOptimizationFlags : uint32 { OptFlagNone = 0x00000000, PadParamCacheSpace = 0x00000001, }; /// Defines the initial value to use for DCC metadata enum class DccInitialClearKind { Uncompressed = 0x0, OpaqueBlack = 0x1, OpaqueWhite = 0x2, ForceBit = 0x10, ForceOpaqueBlack = (ForceBit | OpaqueBlack), ForceOpaqueWhite = (ForceBit | OpaqueWhite), }; /// Enum defining the different scopes (i.e. registry locations) where settings values are stored enum InternalSettingScope : uint32 { PrivateDriverKey = 0x0, PublicPalKey = 0x1, PrivatePalKey = 0x2, #if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 888 PublicCatalystKey = 0x3, PrivatePalGfx9Key = 0x4, PrivatePalGfx12Key = 0x5, #else // PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 888 PrivatePalGfx6Key = 0x3, PrivatePalGfx9Key = 0x4, PublicCatalystKey = 0x5, PrivatePalGfx12Key = 0x7, #endif PublicPalFile = 0x9, RootDriverKey = 0xa, }; /// Enum defining override states for feature settings. enum class FeatureOverride : uint32 { Default = 0, ///< Default setting state. Enabled = 1, ///< (Force) enabled state. Default may change itself to this state. Disabled = 2 ///< (Force) disabled state. Default may change itself to this state. }; /// Enum bitmask defining externally-controlled (e.g. by Radeon Settings/KMD) driver feature settings. enum RsFeatureType : uint32 { RsFeatureTypeTurboSync = (1u << 0), RsFeatureTypeChill = (1u << 1), RsFeatureTypeDelag = (1u << 2), RsFeatureTypeBoost = (1u << 4), RsFeatureTypeProVsr = (1u << 5), }; /// Output structure containing information about the requested RsFeatureType (singular). union RsFeatureInfo { /// Global TurboSync settings. struct { bool enabled; ///< Specifies whether TurboSync is enabled globally. } turboSync; /// Global Chill settings. struct { bool enabled; ///< Specifies whether Chill is enabled globally. uint32 hotkey; ///< If nonzero, specifies the virtual key code assigned to Chill. uint32 minFps; ///< Specifies the global Chill minimum FPS limit. uint32 maxFps; ///< Specifies the global Chill maximum FPS limit. } chill; /// Global Delag settings. struct { bool enabled; ///< Specifies whether Delag is enabled globally. uint32 hotkey; ///< If nonzero, specifies the virtual key code assigned to Delag. uint32 hotkeyInd;///< If nonzero, specifies the virtual key code assigned to Delag's indicator. uint32 limitFps; ///< Specifies the global Delag FPS limit. uint32 level; ///< Specifies the global Delag level. } delag; /// Global Boost settings. struct { bool enabled; ///< Specifies whether Boost is enabled globally. uint32 hotkey; ///< If nonzero, specifies the virtual key code assigned to Boost. uint32 hotkeyInd; ///< If nonzero, specifies the virtual key code assigned to Boost's indicator. uint32 minRes; ///< Specifies the global Boost minimum resolution. bool adaptiveVrsEnabled; ///< Specifies whether BoostAdaptiveVrs is enabled globally. } boost; /// Global ProVsr settings. struct { bool enabled; ///< Specifies whether ProVsr is enabled globally. uint32 hotkey; ///< If nonzero, specifies the virtual key code assigned to ProVsr. } proVsr; }; /// High-dynamic range (HDR) surface display modes. Used to indicate the HDR display standard for a particular swap /// chain texture format and screen colorspace/transfer function combination. enum class HdrDisplayMode : uint32 { Sdr = 0, ///< Standard dynamic range; non-HDR compatible (default). Hdr10 = 1, ///< HDR10 PQ. Requires 10:10:10:2 swap chain. ScRgb = 2, ///< scRGB HDR (Microsoft and FreeSync2 linear mode). 1.0 = 80 nits, 125.0 = 10000 nits. /// Requires FP16 swapchain. FreeSync2 = 3, ///< FreeSync2 HDR10 Gamma 2.2. Requires 10:10:10:2 swap chain. }; static constexpr uint32 MaxMiscStrLen = 61; /// Whether to use graphics or compute for performing fast clears on depth stencil views. enum class FastDepthStencilClearMode : uint8 { Default, ///< Compute or graphics will be chosen at the driver's discretion Graphics, ///< Graphics will always be used Compute ///< Compute will always be used }; enum DeferredBatchBinMode : uint32 { #if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 875 DeferredBatchBinCustom = 0, DeferredBatchBinAccurate = 1 #else DeferredBatchBinDisabled = 0, DeferredBatchBinCustom = 1, DeferredBatchBinAccurate = 2 #endif }; /// PWS enable mode: e.g. disabled, fully enabled or partially enabled. enum class PwsMode : uint32 { Disabled = 0, ///< PWS feature is disabled Enabled = 1, ///< PWS feature is fully enabled if HW supports. NoLateAcquirePoint = 2 ///< PWS feature is enabled with PWS counter only if HW supports, no late acquire points. }; #if defined(__unix__) /// Defines the supported VM always valid modes. enum VmAlwaysValidEnable : uint32 { VmAlwaysValidDefaultEnable = 0, ///< Only enable the optimization when kernel driver can support VmAlwaysValidForceDisable = 1, ///< Force the optimization always disabled VmAlwaysValidForceEnable = 2 ///< Force the optimization always enabled }; #endif /// Alignment mode for accessing graphics/compute buffers. /// This doesn't change the alignment of the whole buffer, just the alignment at which accesses must be made. enum class BufferAlignmentMode : uint8 { Default, ///< KMD (and therefore PAL) picks alignment requirement. Client should be prepared for anything. Dword, ///< Hardware will automatically align requests to the smaller of: element-size or DWORD. Unaligned ///< Any request alignment is allowed. }; enum TemporalHintsMrtBehavior : uint8 { TemporalHintsDynamicRt = 0x0, ///< Enable Dynamic RT Temporal hints. PAL chooses NT vs RT based on heuristics. TemporalHintsStaticRt = 0x1, ///< Regular temporal for both near and far read/write caches. TemporalHintsStaticNt = 0x2, ///< Non-temporal (re-use not expected) for both near and far read/write caches. }; /// Client-controllable behavior for Gfx12-specific software workaround to HiSZ hardware bug. /// Allows the client to override PAL defaults for performance or profiled reasons. enum class HiSZWorkaroundBehavior : uint8 { Default = 0x0, ///< Let PAL decide what the default is. ForceDisableAllWar, ///< Force disable all workarounds. /// Note: This should rarely be chosen and carries an extremely high risk of issue. /// Should only be used when application has been profiled to guarantee no /// risk of issue. ForceHiSZDisableBasedWar, ///< Force the disable HiZ/S based workaround behavior. ForceHiSZEventBasedWar, ///< Force the event-after-draw workaround behavior. /// Note: This carries a risk that the hang may still be seen. ForceHiSZDisableBaseWarWithReZ ///< Force the disable HiZ/S based workaround behavior, but with an added /// optimization to force ZOrder mode to EarlyZThenReZ to reclaim some performance. /// This will apply to all graphics pipelines that trigger the workaround /// condition, except those pipelines that have set /// GraphicsPipelineCreateInfo::noForceReZ. This ZOrder change will occur only when /// HiZ/S is forcibly disabled by the workaround; otherwise the ZOrder remains what /// was chosen by the compiler. }; /// Pal settings that are client visible and editable. struct PalPublicSettings { /// Maximum border color palette size supported by any queue. uint32 borderColorPaletteSizeLimit; /// Whether to use graphics or compute for performing fast clears on depth stencil views. FastDepthStencilClearMode fastDepthStencilClearMode; #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 936 /// Forces all serialized loads (LoadPipeline or LoadCompoundState) to fail. bool forceLoadObjectFailure; #endif /// Controls the distribution mode for tessellation, which affects how patches are processed by different VGT /// units. 0: None - No distribution across VGTs (legacy mode). 1: Default - Optimal settings are chosen depending /// on the gfxip. 2: Patch - Individual patches are distributed to different VGTs. 3: Donut - Patches are split /// into donuts and distributed to different VGTs. 4: Trapezoid - Patches from donuts are split into trapezoids and /// distributed to different VGTs. Falls back to donut mode if HW does not support this mode. 5: Trapezoid only - /// Distribution turned off if HW does not support this mode. uint32 distributionTessMode; /// Flags that control PAL optimizations to reduce context rolls. 0: Optimization disabled. 1: Pad parameter cache /// space. Sets VS export count and PS interpolant number to per-command buffer maximum value. Reduces context rolls /// at the expense of parameter cache space. uint32 contextRollOptimizationFlags; /// The number of unbound descriptor debug srds to allocate. To detect reads of unbound descriptor within arrays, /// multiple debug srds can be allocated. uint32 unboundDescriptorDebugSrdCount; /// Disables compilation of internal PAL shaders. It can be enabled only if a PAL client won't use any of PAL blit /// functionalities on gfx/compute engines. bool disableResourceProcessingManager; #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 936 /// Controls app detect and image quality altering optimizations exposed by CCC. uint32 catalystAI; #endif /// Controls texture filtering optimizations exposed by CCC. uint32 textureOptLevel; #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 936 /// Disables SC initialization. It can be enabled only if a PAL client won't use SC for shader compilation and /// provide direct ISA binaries(usually AQL path). bool disableScManager; #endif /// Information about the client performing the rendering. For example: Rendered By PAL (0.0.1) char renderedByString[MaxMiscStrLen]; /// Debug information that the client or tester might want reported. char miscellaneousDebugString[MaxMiscStrLen]; #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 936 /// Allows SC to make optimizations at the expense of IEEE compliance. bool allowNonIeeeOperations; /// Controls whether shaders should execute one atomic instruction per wave for UAV append/consume operations. /// If false, one atomic will be executed per thread. bool appendBufPerWaveAtomic; #endif /// Bitmask of cases where texture compatible meta data will be used Single-sample color surface: 0x00000001 MSAA /// color surface: 0x00000002 FMask data: 0x00000004 Single-sample depth surface: 0x00000008 MSAA depth surface: /// 0x00000010 Allow stencil: 0x00000020 Allow Z-16 surfs 0x00000040 uint32 tcCompatibleMetaData; /// Specifies the threshold below which CmdCopyMemory() is executed via a CpDma BLT, in bytes. CPDMA copies have /// lower overhead than CS/Gfx copies, but less throughput for large copies. uint32 cpDmaCmdCopyMemoryMaxBytes; /// Forces high performance state for allocated queues. Note: currently supported in Windows only. bool forceHighClocks; /// When submitting multiple command buffers in a single grQueueSubmit call, the ICD will patch the command streams /// so that the command buffers are chained together instead of submitting through KMD multiple times. This setting /// limits the number of command buffers that will be chained together; reduce to prevent problems due to long /// running submits. uint32 cmdBufBatchedSubmitChainLimit; /// Flags that control PAL's command allocator residency optimizations. If a command allocation isn't optimized PAL /// will wait for it to become resident at creation. 0x1 - Wait for command data to become resident at Submit-time. /// 0x2 - Wait for embedded data to become resident at Submit-time. 0x4 - Wait for marker data to become resident at /// Submit-time. uint32 cmdAllocResidency; /// Overrides max queued frames allowed uint32 maxQueuedFrames; /// Maximum number of presentable images per adapter(including LDA chain) which is recommended. If app exceeds the /// presentable image number threshold, awarning may be reported. uint32 presentableImageNumberThreshold; /// Provides a hint to PAL that client knows that every individual depth stencil surfaces are always cleared with /// same values.If TRUE, per-tile tracking of exp/clear will be enabled (requires HTile). bool hintInvariantDepthStencilClearValues; /// Provides a hint to PAL that PAL should disable color compression on surfaces that are smaller than or equal to /// this setting (setting * setting) in size. uint32 hintDisableSmallSurfColorCompressionSize; /// Disables Escape call to KMD. This is a temporary setting for experimentation that is expected to break features /// that currently needs Escape call. bool disableEscapeCall; /// A hint to the Windows OS that this application has submissions that are expected to run for a long time. This /// tells the OS that checking elapsed execution time is not a good way to judge if the GPU is hung. /// /// If this flag is set, Windows shouldn't TDR long submissions on uncontested hardware queues. Submissions on /// contested queues must preempt within the time limit to avoid a TDR. Setting this may also extend the TDR timer /// on compute queues. bool longRunningSubmissions; /// Disables MCBP on demand. This is a temporary setting until ATOMIC_MEM packet issue with MCBP is resolved. bool disableCommandBufferPreemption; /// Disable the fast clear eliminate skipping optimization. This optimization will conservatively track the usage /// of clear values to allow the vast majority of images that never clear to a value that isn't TC-compatible to /// skip the CPU and front-end GPU overhead of issuing a predicated fast clear eliminate BLT. bool disableSkipFceOptimization; /// Sets the minimum BPP of surfaces which will have DCC enabled uint32 dccBitsPerPixelThreshold; /// See largePageSizeInBytes in DeviceProperties. This limit defines how large an allocation must be to have /// PAL automatically pad allocation starting virtual address alignments to enable this optimization. By /// default, PAL will use the KMD-reported limit. gpusize largePageMinSizeForVaAlignmentInBytes; /// See largePageSizeInBytes in DeviceProperties. This limit defines how large an allocation must be to have /// PAL automatically pad allocation sizes to fill an integral number of large pages. By default, PAL will /// use the KMD-reported limit. gpusize largePageMinSizeForSizeAlignmentInBytes; /// Makes the unbound descriptor debug srd 0 so the hardware drops the load and ignores it instead of pagefaulting. /// Used to workaround incorrect app behavior. bool zeroUnboundDescDebugSrd; /// Preferred heap for uploading client pipelines. Default is set to @ref GpuHeap::GpuHeapInvisible. Setting is /// ignored for internal pipelines and are uploaded to @ref GpuHeap::GpuHeapLocal. GpuHeap pipelinePreferredHeap; bool depthClampBasedOnZExport; #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 928 /// Force the PreColorTarget to an earlier PreRasterization point if used as a wait point. This is to prevent a /// write-after-read hazard for a corner case: shader exports from distinct packers are not ordered. Advancing /// wait point from PreColorTarget to PostPrefetch could cause over-sync due to extra VS/PS_PARTIAL_FLUSH /// inserted. It is default to false, but client drivers may choose to app-detect to enable if see corruption. bool forceWaitPointPreColorToPostPrefetch; #endif /// Allows the client to disable debug overlay visual confirm after DebugOverlay::Platform is created when the /// panel setting DebugOverlayEnabled is globally set but a certain application might need to turn off visual /// confirm to make the screen not too noisy. bool disableDebugOverlayVisualConfirm; bool enableExecuteIndirectPacket; /// Offers flexibility to the client to choose Graphics vs Compute engine for Indirect Command Generation /// (Shader path) based on performance and other factors. The default is false since we have seen perf gains using /// the ACE. bool disableExecuteIndirectAceOffload; /// Value to initialize metadata for DCC surfaces to, if they are compressable. This has no effect on non-DCC /// images. Images whose initial layout is not compressable are only affected if this is "forced". /// 0x00 - Uncompressed (default) /// 0x01 - Opaque Black /// 0x02 - Opaque White /// 0x11 - Forced Opaque Black /// 0x12 - Forced Opaque White uint32 dccInitialClearKind; /// Allows the client to not create internal VrsImage. Pal internal will create a 16M image as vrsImageSize. bool disableInternalVrsImage; /// Allows the client to control binning persistent and context states per bin. /// A value of 0 tells PAL to pick the number of states per bin. uint32 binningPersistentStatesPerBin; uint32 binningContextStatesPerBin; /// This key controls if binning will be disabled when the PS may kill pixels. OverrideMode disableBinningPsKill; /// Controls GS LateAlloc val (for pos/prim allocations NOT param cache) on NGG pipelines. Can be no more than 127. uint32 nggLateAllocGs; /// Bitmask of cases where RPM view memory accesses will bypass the MALL /// RpmViewsBypassMallOff (0x0): Disable MALL bypass /// RpmViewsBypassMallOnRead (0x1): Skip MALL for read access of views created in RPM /// RpmViewsBypassMallOnWrite (0x2): Skip MALL for write access of views created in RPM /// RpmViewsBypassMallOnCbDbWrite (0x4): Control the RPM CB/DB behavior RpmViewsBypassMall rpmViewsBypassMall; /// Optimize color export format for depth only rendering. Only applicable for RB+ parts bool optDepthOnlyExportRate; /// Controls whether or not we should expand Hi-Z to full range rather than doing fine-grain resummarize /// operations. Expanding Hi-Z leaves the Hi-Z data in a less optimal state but is a much faster operation /// than the fine-grain resummarize. bool expandHiZRangeForResummarize; /// Control whether to have command buffer emit SQTT marker events. Useful for client driver to perform SQTT /// dump without the involvement of dev driver. bool enableSqttMarkerEvent; /// Controls the value of CB_COLOR0_ATTRIB.LIMIT_COLOR_FETCH_TO_256B_MAX. This bit limits CB fetch to 256B on cache /// miss, regardless of sector size. bool limitCbFetch256B; /// Controls whether or not deferred batch binning is enabled 0 : Batch binning always disabled 1 : Use custom bin /// sizes 2 : Optimal. DeferredBatchBinMode binningMode; /// Controls the custom batch bin size.Only used when deferredBatchBinMode == 1 High word is for x, low word is for /// y. Default is 128x128. Values must be power of two between 16 and 512. uint32 customBatchBinSize; /// Maximum number of primitives per batch. The maximum value is 1024. uint32 binningMaxPrimPerBatch; /// Controls PWS enable mode: disabled, fully enabled or partially enabled. Only take effect if HW supports PWS. PwsMode pwsMode; /// Controls the MaxScratchRingSizeBaseline, which is really just the maximum size of the scratch ring gpusize maxScratchRingSizeBaseline; /// Controls the maximum size of the scratch ring allocation uint32 maxScratchRingSizeScalePct; #if defined(__unix__) /// Whether enable vm-always-valid feature on Linux while allocating Bo VmAlwaysValidEnable enableVmAlwaysValid; #endif /// Alignment mode for graphics and compute buffers. /// Clients must check Pal::DeviceProperties::osProperties::flags::forceAlignmentSupported /// to see if anything other than default will work. BufferAlignmentMode hardwareBufferAlignmentMode; // Disallows putting the shader ring in system memory for performance purposes // This is done by un-listing GpuHeapGartUswc as a possible heap for the shader rings. // Instead only allowing GpuHeapInvisible and GpuHeapLocal bool forceShaderRingToVMem; /// If the client sets this to true they promise they've done exhaustive testing on every ASIC to prove that this /// application can use AC01 fast clears safely. This should never be forced to true unconditionally. bool ac01WaNotNeeded; /// Toggles whether or not image copies will prefer using the graphics pipeline. This setting does not force all /// copies to use graphics or compute, it changes what method will be selected in cases where either could be used. bool preferGraphicsImageCopy; /// Bitmask to control adding Waits around Flush events /// This is public setting to allow AppDetect to override of 'WaitOnFlush' setting. /// Setting 'waitOnFlush' for actual workarounds is strongly discouraged because: /// 1) It has a negative performance impact. /// 2) Waits effect the timing and pipeline execution which can hide underlying hw/fw/sw bugs. /// Issues resolved by added waits should be root caused. uint32 waitOnFlush; /// Provides the ability for mall to be alloc-ed/noalloc-ed using the dynamic or static behavior. TemporalHintsMrtBehavior temporalHintsMrtBehavior; /// Allows the client to change the behavior of the Gfx12 HW Bug that impacts HiS and HiZ. /// This should never be forced away from default unconditionally. HiSZWorkaroundBehavior hiSZWorkaroundBehavior; /// For event-based HiSZ workarounds (Gfx12), specifies the timeouts supported by the Scan Converter and Depth Block /// for their tile summarizer controller. /// Ignored unless @ref hiSZWorkaroundBehavior is set to ForceHiSZEventBasedWar. /// If set to 0, PAL picks a default value. /// Note: Choosing any value other than 0 carries with it **significant risk** of hangs, as the timeout value /// determined by PAL is the most optimal to avoid the most hangs. Any deviation from the default must be /// thoroughly tested and is not guaranteed to be safe! uint32 tileSummarizerTimeout; #if PAL_KMT_BUILD /// If client sets it to false, native fence will be disabled in OS wide no matter OS or hardware supports /// native fence or not. bool enableNativeFence; #endif /// Whether HiZ/HiS is valid after depth/stencil image is copied as destination. /// - If false, this is not guaranteed, PAL will issue necessary HiZ/HiS resummarization at later client barrier /// call time when transition to another HiZ/HiS valid layout like DepthStencilTarget. /// - If true, PAL does necessary HiZ/HiS fixup after copy if needed and HiZ/HiS will be always valid after copy. /// At later client barrier call time, no HiZ/HiS resummarization will happen. bool makeLayoutCopyDstHiSZCompatible; }; /// Defines the modes that the GPU Profiling layer can use when its buffer fills. enum GpuProfilerStallMode : uint32 { GpuProfilerStallAlways = 0, ///< Always stall to get accurate trace data GpuProfilerStallLoseDetail = 1, ///< Lose register-level detail if under pressure to avoid stalls GpuProfilerStallNever = 2, ///< Never stall, miss trace packets }; /// Describes the equations needed to interpret the raw memory of a tiled texture. struct SwizzleEquation { SwizzleEquationBit addr[SwizzleEquationMaxBits]; ///< Address setting: each bit is the result of addr ^ xor ^ xor2. SwizzleEquationBit xor1[SwizzleEquationMaxBits]; ///< xor setting. SwizzleEquationBit xor2[SwizzleEquationMaxBits]; ///< xor2 setting. uint32 numBits; ///< The number of bits in the equation. bool stackedDepthSlices; ///< True if depth slices are treated as being stacked vertically /// prior to swizzling. }; /// Specifies the hardware features supported for PRT (sparse images). enum PrtFeatureFlags : uint32 { PrtFeatureBuffer = 0x00000001, ///< Indicates support for sparse buffers PrtFeatureImage2D = 0x00000002, ///< Indicates support for sparse 2D images PrtFeatureImage3D = 0x00000004, ///< Indicates support for sparse 3D images PrtFeatureImageMultisampled = 0x00000008, ///< Indicates support for sparse multisampled images PrtFeatureImageDepthStencil = 0x00000010, ///< Indicates support for sparse depth/stencil images PrtFeatureShaderStatus = 0x00000020, ///< Indicates support for residency status in shader instructions PrtFeatureShaderLodClamp = 0x00000040, ///< Indicates support for LOD clamping in shader instructions PrtFeatureUnalignedMipSize = 0x00000080, ///< Indicates support for non-miptail levels with dimensions that /// aren't integer multiples of the tile size as long as they are /// at least as large as a single tile PrtFeaturePerSliceMipTail = 0x00000100, ///< Indicates support for per-slice miptail (slice-major order) PrtFeatureTileAliasing = 0x00000200, ///< Indicates support for aliasing tiles (without metadata) PrtFeatureStrictNull = 0x00000400, ///< Indicates whether reads of unmapped tiles always return zero #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 936 PrtFeatureNonStandardImage3D = 0x00000800, ///< Indicates support for sparse 3D images restricted to /// non-standard tile shapes that match the tile mode block depth PrtFeaturePrtPlus = 0x00001000, ///< Indicates that this image supports use of residency maps. #else PrtFeaturePrtPlus = 0x00000800, ///< Indicates that this image supports use of residency maps. #endif }; /// Describe the settings' scope accessible by clients. enum class SettingScope { Driver, ///< For settings specific to a UMD Global, ///< For global settings controlled by CCC File, ///< For settings that are only read from a file Root, ///< For settings under HKR }; /// Big Software (BigSW) Release information structure /// Software release management uses this version # to control a rollout of big SW features together. struct BigSoftwareReleaseInfo { uint32 majorVersion; ///< BigSW Release Major version uint32 minorVersion; ///< BigSW Release Minor version. uint32 miscControl; ///< BigSW Release miscellaneous control. }; /// Virtual display capabilities as determined by the OS. The reported values bound the valid ranges of values supported /// by the @ref VirtualDisplayInfo structure passed in to @ref IDevice::CreateVirtualDisplay. struct VirtualDisplayCapabilities { uint32 maxVirtualDisplays; ///< The maximum number of virtual display supported Rational minRefreshRate; ///< The minimum refresh rate Rational maxRefreshRate; ///< The maximum refresh rate }; /// The properties of a specific virtual display struct VirtualDisplayProperties { bool isVirtualDisplay; ///< True, if it's a virtual display }; /// Enumerates all of the types of local video memory which could be associated with a GPU. enum class LocalMemoryType : uint32 { Unknown = 0, Ddr2, Ddr3, Ddr4, Gddr5, Gddr6, Hbm, Hbm2, Hbm3, Lpddr4, Lpddr5, Ddr5, Count }; /// Bitmask of all MSAA/EQAA types supported, in terms of samples (S) and shaded fragments (F) enum MsaaFlags : uint16 { MsaaS1F1 = 0x0001, MsaaS2F1 = 0x0002, MsaaS4F1 = 0x0004, MsaaS8F1 = 0x0008, MsaaS16F1 = 0x0010, MsaaAllF1 = 0x001F, MsaaS2F2 = 0x0020, MsaaS4F2 = 0x0040, MsaaS8F2 = 0x0080, MsaaS16F2 = 0x0100, MsaaAllF2 = 0x01E0, MsaaS4F4 = 0x0200, MsaaS8F4 = 0x0400, MsaaS16F4 = 0x0800, MsaaAllF4 = 0x0E00, MsaaS8F8 = 0x1000, MsaaS16F8 = 0x2000, MsaaAllF8 = 0x3000, MsaaAll = 0x3FFF, }; /// Supported RTIP version enumeration enum class RayTracingIpLevel : uint32 { _None = 0x0, ///< The device does not have an RayTracing Ip Level #ifndef None None = _None, ///< The device does not have an RayTracing Ip Level #endif RtIp1_0 = 0x1, ///< First Implementation of HW RT RtIp1_1 = 0x2, ///< Added computation of triangle barycentrics into HW RtIp2_0 = 0x3, ///< Added more Hardware RayTracing features, such as BoxSort, PointerFlag, etc #if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 888 RtIp3_0 = 0x4, ///< Added high precision box node, HW instance node, dual intersect ray, BVH8 intersect ray, /// LDS stack push 8 pop 1, and LDS stack push 8 pop 2 RtIp3_1 = 0x5, ///< Added improved bvh footprints (change to node pointer, 128 Byte primitive structure format, /// 128 Byte Quantized box node, obb support, wide sort) #else // PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 888 RtIp3_0 = 0x4, ///< Added high precision box node, HW instance node, dual intersect ray, BVH8 intersect ray, /// LDS stack push 8 pop 1, and LDS stack push 8 pop 2 RtIp3_1 = 0x6, ///< Added improved bvh footprints (change to node pointer, 128 Byte primitive structure format, /// 128 Byte Quantized box node, obb support, wide sort) #endif }; /// Which ip version a component has reported struct IpTriple { uint32 major : 8; /// Major revision value uint32 minor : 8; /// Minor revision value uint32 stepping : 16; /// Stepping value constexpr operator uint32() const { return ((stepping ) | (minor << 16) | (major << 24)); } }; /// Reports various properties of a particular IDevice to the client. @see IDevice::GetProperties. struct DeviceProperties { uint32 vendorId; ///< Vendor ID (should always be 0x1002 for AMD). uint32 deviceId; ///< GPU device ID (e.g., Hawaii XT = 0x67B0). uint32 revisionId; ///< GPU revision. HW-specific value differentiating between different /// SKUs or revisions. Corresponds to one of the PRID_* revision IDs. uint32 eRevId; ///< GPU emulation/internal revision ID. AsicRevision revision; ///< ASIC revision. GpuType gpuType; ///< Type of GPU (discrete vs. integrated) uint16 gpuPerformanceCapacity; ///< Portion of GPU assigned in virtualized system (SRIOV) ///< 0-65535, 0 invalid (not virtualized), 1 min, 65535 max GfxIpLevel gfxLevel; ///< IP level of this GPU's GFX block #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 888 OssIpLevel ossLevel; ///< IP level of this GPU's OSS block VceIpLevel vceLevel; ///< IP level of this GPU's VCE block UvdIpLevel uvdLevel; ///< IP level of this GPU's UVD block #endif VcnIpLevel vcnLevel; ///< IP level of this GPU's VCN block #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 888 SpuIpLevel spuLevel; ///< IP level of this GPU's SPU block #endif PspIpLevel pspLevel; ///< IP level of this GPU's PSP block uint32 gfxStepping; ///< Stepping level of this GPU's GFX block IpTriple gfxTriple; ///< Full GFX IP level (major.minor.step) of this GPU char gpuName[MaxDeviceName]; ///< Null terminated string identifying the GPU. uint32 gpuIndex; ///< Device's index in a linked adapter chain. uint32 maxGpuMemoryRefsResident; ///< Maximum number of GPU memory references that can be resident /// at any time. Memory references set both via IQueue and IDevice /// (via AddGpuMemoryReferences() or Submit()) count against this limit. uint64 timestampFrequency; ///< Frequency of the device's timestamp counter in Hz. /// @see ICmdBuffer::CmdWriteTimestamp. uint32 attachedScreenCount; ///< Number of screen attached to the device. uint32 maxSemaphoreCount; ///< Queue semaphores cannot have a signal count higher than this value. /// For example, one indicates that queue semaphores are binary. PalPublicSettings settings; ///< Public settings that the client has the option of overriding #if PAL_CLIENT_EXAMPLE AddrHandle hAddrlib; ///< Handle to addrlib for directed image tests. #endif struct { union { struct { /// This engine supports timestamps (ICmdBuffer::CmdWriteTimestamp()). uint32 supportsTimestamps : 1; /// This engine supports ICmdBuffer::CmdSetPredication() based on Streamout/Occlusion query uint32 supportsQueryPredication : 1; /// This engine supports ICmdBuffer::CmdSetPredication() based on a 32-bit GPU memory allocation uint32 supports32bitMemoryPredication : 1; /// This engine supports ICmdBuffer::CmdSetPredication() based on a 64-bit GPU memory allocation uint32 supports64bitMemoryPredication : 1; /// This engine supports ICmdBuffer::If(), Else() and EndIf() calls. uint32 supportsConditionalExecution : 1; /// This engine supports ICmdBuffer::While() and EndWhile() calls. uint32 supportsLoopExecution : 1; /// This engine supports ICmdBuffer::CmdWaitRegisterValue(), WaitMemoryValue() and /// CopyRegisterToMemory() calls. uint32 supportsRegMemAccess : 1; /// This engine supports ICmdBuffer::CmdCopyImage() between optimally tiled images with /// mismatched tiling tokens. uint32 supportsMismatchedTileTokenCopy : 1; /// This engine supports ICmdBuffer::Barrier() calls that transition out of the @ref /// LayoutUninitializedTarget layout. uint32 supportsImageInitBarrier : 1; /// This engine supports ICmdBuffer::Barrier() calls that transition out of the @ref /// LayoutUninitializedTarget layout for individual subresources. If this is not set and /// supportsImageInitBarrier is set, the subresource range must span the entire image. uint32 supportsImageInitPerSubresource : 1; /// This engine does not support any virtual memory features. IQueue::RemapVirtualMemoryPages and /// IQueue::CopyVirtualPageMappings are not supported on Queues using this engine. uint32 runsInPhysicalMode : 1; /// Indicates whether this engine can do virtual memory remap or not. uint32 supportVirtualMemoryRemap : 1; #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 914 /// Indicates whether this Queues using this engine can maintain the contents of CE RAM across /// consecutive submissions. If this is not set, the client must not specify a nonzero value for /// either @ref QueueCreateInfo::persistentCeRamSize or @ref QueueCreateInfo::persistentCeRamOffset. uint32 supportPersistentCeRam : 1; #else uint32 reserved914 : 1; #endif /// Indicates whether the engine supports the command allocator tracks which chunk is idle. uint32 supportsTrackBusyChunks : 1; /// Indicates whether the engine can safely access non-resident ranges of resources. uint32 supportsUnmappedPrtPageAccess : 1; /// This engine supports clear or copy with MSAA depth-stencil destination uint32 supportsClearCopyMsaaDsDst : 1; /// Reserved for future use. uint32 reserved : 16; }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< Engines property flags. struct { union { struct { uint32 exclusive : 1; ///< Engine is exclusively owned by one client at a time. uint32 mustUseDispatchTunneling : 1; ///< Queues created on this engine must use dispatch /// tunneling. /// Indicates whether this engine instance can be used for gang submission workloads via /// a multi-queue. /// @see IDevice::CreateMultiQueue. uint32 supportsMultiQueue : 1; uint32 hwsEnabled : 1; uint32 isHighPriority : 1; uint32 reserved : 27; ///< Reserved for future use. }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< Capabilities property flags. uint32 queuePrioritySupport; ///< Mask of QueuePrioritySupport flags indicating which queue /// priority levels are supported by this engine. uint32 dispatchTunnelingPrioritySupport; ///< Mask of QueuePrioritySupport flags indicating which queue /// priority levels support dispatch tunneling on this engine. uint32 maxFrontEndPipes; ///< Up to this number of IQueue objects can be consumed in /// parallel by the front-end of this engine instance. It will /// only be greater than 1 on hardware scheduled engine backed /// by multiple hardware pipes/threads. } capabilities[MaxAvailableEngines]; ///< Lists each engine of this type (up to engineCount) and their /// properties. uint32 engineCount; ///< Number available engines of this type. uint32 queueSupport; ///< Mask of QueueTypeSupport flags indicating which queues are /// supported by this engine. uint32 maxBorderColorPaletteSize; ///< Maximum size of a border color palette on this engine. uint32 controlFlowNestingLimit; ///< Maximum depth of command-buffer control flow nesting on this /// engine. #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 914 uint32 ceRamSizeAvailable; ///< Size, in bytes, of constant engine RAM available on this engine. #endif Extent3d minTiledImageCopyAlignment; ///< Minimum alignments (pixels) for X/Y/Z/Width/Height/Depth for /// ICmdBuffer::CmdCopyImage() between optimally tiled images. Extent3d minTiledImageMemCopyAlignment; ///< Minimum alignments (bytes) for X/Y/Z/Width/Height/Depth for /// ICmdBuffer::CmdCopyImage() with an optimally tiled image and a /// linearly tiled image. Also applies to /// ICmdBuffer::CmdCopyImageToMemory() or /// ICmdBuffer::CmdCopyMemoryToImage() with an optimally tiled image. Extent3d minLinearMemCopyAlignment; ///< Minimum alignments (bytes) for X/Y/Z/Width/Height/Depth for /// ICmdBuffer::CmdCopyTypedBuffer(). uint32 minTimestampAlignment; ///< If supportsTimestamps is set, this is the minimum address alignment /// in bytes of the dstOffset in ICmdBuffer::CmdWriteTimestamp(). uint32 maxNumDedicatedCu; ///< The maximum number of dedicated CUs for the real time audio queue uint32 maxNumDedicatedCuPerQueue; ///< The maximum number of dedicated CUs per queue uint32 dedicatedCuGranularity; ///< The granularity at which compute units can be dedicated to a queue /// Specifies the suggested heap preference clients should use when creating an @ref ICmdAllocator that will /// allocate command space for this engine type. These heap preferences should be specified in the allocHeap /// parameter of @ref CmdAllocatorCreateInfo. Clients are free to ignore these defaults and use their own /// heap preferences, but may suffer a performance penalty. GpuHeap preferredCmdAllocHeaps[CmdAllocatorTypeCount]; /// Indicate which queue supports per-command, per-submit, or per-queue TMZ based on the queue type. TmzSupportLevel tmzSupportLevel; } engineProperties[EngineTypeCount]; ///< Lists available engines on this device and their properties. struct { union { struct { /// This queue supports IQueue::PresentSwapChain() calls. Note that a queue may support swap chain /// presents even if the supportedDirectPresentModes flags below indicate no support for direct /// presents; instead swap chain PresentMode support is queried via GetSwapChainInfo. uint32 supportsSwapChainPresents : 1; uint32 reserved744 : 1; #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 927 /// Set if the queue supports additional split barrier feature on top of basic acquire/release /// interface support. This provides CmdAcquire() and CmdRelease() to implement split barriers. #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 893 /// Note: supportReleaseAcquireInterface is a prerequisite to supportSplitReleaseAcquire. #endif uint32 supportSplitReleaseAcquire : 1; #else uint32 reserved927 : 1; #endif /// Reserved for future use. uint32 reserved : 29; }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< Queue property flags. uint32 supportedDirectPresentModes; ///< A mask of PresentModeSupport flags indicating support for various /// PresentModes when calling IQueue::PresentDirect(). } queueProperties[QueueTypeCount]; ///< Lists the properties of all queues supported by PAL. struct { union { struct { /// Indicates support for virtual GPU memory allocations. @see IQueue::RemapVirtualMemoryPages. uint32 virtualRemappingSupport : 1; /// Indicates support for pinning system memory for access as GPU memory. /// @see IDevice::PinSystemMemory. uint32 pinningSupport : 1; /// Indicates support pinned memory which is host-mapped from foreign device. uint32 supportHostMappedForeignMemory : 1; /// Indicates whether specifying memory references at Submit time is supported. If not supported /// all memory references must be manged via IDevice or IQueue AddGpuMemoryReferences() uint32 supportPerSubmitMemRefs : 1; /// Indicates support for GPU virtual addresses that are visible to all devices. uint32 globalGpuVaSupport : 1; /// Indicates support for Shared Virtual Memory VA range. uint32 svmSupport : 1; /// Indicates support for shadow desc VA range. uint32 shadowDescVaSupport : 1; /// Indicates support for IOMMUv2. Fine grain SVM is not supported without IOMMU. /// PAL client needs to check this flag before using fine grain SVM. /// IOMMU is a memory management unit (MMU) that connects a direct-memory-access-capable /// (DMA-capable) I/O bus to the main memory. uint32 iommuv2Support : 1; /// Indiciates that the platform supports automatic GPU memory priority management. uint32 autoPrioritySupport : 1; /// Indicates KMD has enabled HBCC(High Bandwidth Cache Controller) page migration support. This means /// shaders must be compiled such that all memory clauses can be replayed in response to an XNACK. uint32 pageMigrationEnabled : 1; /// Indicates TMZ (or HSFB) protected memory allocations are supported. uint32 supportsTmz : 1; /// Memory allocations on this device support MALL (memory access last level); essentially /// the lowest level cache possible. uint32 supportsMall : 1; /// Support for querying page fault information uint32 supportPageFaultInfo : 1; /// Indicates if this device supports GFX12-style distributed compression. Client can control /// whether distributed compression is enabled or not per IGpuMemory object using the /// distributedCompression field in @ref GpuMemoryCreateInfo. uint32 supportDistributedCompression : 1; /// All GPU memory allocations in all heaps are always initialized to zero on creation. /// The @ref initializeToZero flag is not needed when this is true. uint32 alwaysInitializedToZero : 1; /// Reserved for future use. uint32 reserved : 17; }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< GPU memory property flags. gpusize realMemAllocGranularity; ///< The addresses and sizes of "real" GPU memory objects must be aligned /// to at least this many bytes. gpusize virtualMemAllocGranularity; ///< The addresses and sizes of virtual GPU memory objects must be aligned /// to at least this many bytes. gpusize virtualMemPageSize; ///< Size in bytes of a virtual GPU memory page. /// @see IQueue::RemapVirtualMemoryPages. gpusize fragmentSize; ///< Size in bytes of a video memory fragment. If GPU memory object /// addresses and sizes are aligned to at least this value, VA translation /// will be a bit faster. It is aligned to the allocation granularities. gpusize largePageSizeInBytes; ///< The large page optimization will allow compatible allocations to /// potentially be upgraded to a page size larger than 64KiB to reduce TLB /// pressure. PAL will automatically pad the size and alignment of some /// allocations to enable this optimization; /// see largePageMinSizeForAlignmentInBytes in PalPublicSettings. gpusize maxVirtualMemSize; ///< Total virtual GPU memory available (total VA space size). gpusize maxPhysicalMemSize; ///< Total VRAM available (Local + Invisible + non-Local heap sizes). gpusize vaStart; ///< Starting address of the GPU's virtual address space. gpusize vaEnd; ///< Ending address of the GPU's virtual address space. gpusize descTableVaStart; ///< Starting address of the descriptor table's virtual address space gpusize shadowDescTableVaStart; ///< Starting address of the shadow descriptor table's virtual address space gpusize privateApertureBase; ///< Private memory base address for generic address space (Windows only). gpusize sharedApertureBase; ///< Shared memory base address for generic address space (Windows only). gpusize busAddressableMemSize; ///< SDI/DirectGMA GPU aperture size set in CCC gpusize maxLocalMemSize; ///< Total VRAM available on the GPU (Local + Invisible heap sizes). LocalMemoryType localMemoryType; ///< Type of local memory used by the GPU. gpusize maxCaptureReplaySize; ///< Total virtual GPU available for Capture/Replay gpusize barSize; ///< Total VRAM which can be accessed by the CPU. struct { float maxMemClock; ///< Maximum GPU memory clock in MHz. For DX builds this value is valid only after /// the device has been finalized. uint32 memPerfRating; ///< Precomputed performance rating of memory operations. uint32 vramBusBitWidth; ///< Memory bus width. uint32 memOpsPerClock; ///< Memory operations per clock. } performance; ///< Performance-related memory properties. } gpuMemoryProperties; ///< Memory properties for this device. struct { union { struct { /// Images created on this device supports AQBS stereo mode, this AQBS stereo mode doesn't apply to the /// array-based stereo feature supported by Presentable images. uint32 supportsAqbsStereoMode : 1; /// Set if images created on this device support being created with corner sampling. uint32 supportsCornerSampling : 1; /// Load Bearing placeholder, do not touch. uint32 placeholder0 : 1; /// Reserved for future use. uint32 reserved : 29; }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< GPU memory property flags. Extent3d maxDimensions; ///< Maximum supported width/height/depth for an image. uint32 maxArraySlices; ///< Maximum supported number of array slices for a 1D or 2D image. PrtFeatureFlags prtFeatures; ///< PRT features supported by the hardware. gpusize prtTileSize; ///< Size, in bytes, of a PRT tile. MsaaFlags msaaSupport; ///< Bitflags for MSAA sample/fragment count support. uint8 maxMsaaFragments; ///< Max number of MSAA fragments per pixel (may have more samples). uint8 numSwizzleEqs; ///< How many swizzle equations are in pSwizzleEqs. Extent2d vrsTileSize; ///< Pixel dimensions of a VRS tile. 0x0 indicates image-based shading rate /// is not supported. const SwizzleEquation* pSwizzleEqs; ///< These describe how to interpret device-dependent tiling modes. bool tilingSupported[static_cast(ImageTiling::Count)]; ///< If each image tiling is supported. } imageProperties; ///< Image properties for this device. struct { /// Maximum number of available shader-accessible user data entries. @see PipelineShaderInfo. uint32 maxUserDataEntries; uint32 maxThreadGroupSize; ///< Per-device limit on threads per threadgroup for compute shaders. /// Some hardware supported by PAL has a bug which can cause a GPU hang if async compute enginesare used while /// compute shaders with > maxAsyncComputeThreadGroupSize are in flight on any queue. This reports the /// maximum "safe" limit on threads per threadgroup for compute shaders for this device if the client wishes to /// use async compute engines. Note that the bug can occur if the following conditions are met: /// (a) Async compute workloads are running *somewhere* on the GPU, in any process; /// (b) Some compute workloads on either the async compute engine or on the universal engine have a threads per /// threadgroup amount which exceeds maxAsyncComputeThreadGroupSize. /// /// It is up to the client to choose how to work around this bug. They are free to either limit applications to /// only creating compute shaders with <= maxAsyncComputeThreadGroupSize threads per group, or to avoid using /// the async compute engines at all. /// /// If this value equals maxThreadGroupSize, then the device does not have this bug and the client can use /// any compute shader on any queue. uint32 maxAsyncComputeThreadGroupSize; uint32 maxComputeThreadGroupCountX; ///< Maximum number of thread groups supported uint32 maxComputeThreadGroupCountY; ///< Maximum number of thread groups supported uint32 maxComputeThreadGroupCountZ; ///< Maximum number of thread groups supported uint32 maxBufferViewStride; ///< Maximum stride, in bytes, that can be specified in a buffer view. uint32 hardwareContexts; ///< Number of distinct state contexts available for graphics workloads. Mostly /// irrelevant to clients, but may be useful to tools. #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 914 uint32 ceRamSize; ///< Maximum on-chip CE RAM size in bytes. #endif uint32 maxPrimgroupSize; ///< Maximum primitive group size. uint32 supportedVrsRates; ///< Bitmask of VrsShadingRate enumerations indicating which modes are supported. uint32 mallSizeInBytes; ///< Size of total MALL (Memory Attached Last Level - L3) cache in bytes. uint32 gl2UncachedCpuCoherency; ///< If supportGl2Uncached is set, then this is a bitmask of all /// CacheCoherencyUsageFlags that will be coherent with CPU reads/writes. /// Note that reporting CoherShader only means that GLC accesses will be /// CPU coherent. /// Note: Only valid if @ref supportGl2Uncached is true. uint32 maxGsOutputVert; ///< Maximum number of GS output vertices. uint32 maxGsTotalOutputComponents; ///< Maximum number of GS output components totally. uint32 maxGsInvocations; ///< Maximum number of GS prim instances, corresponding to geometry shader /// invocation in glsl. RayTracingIpLevel rayTracingIp; ///< HW RayTracing IP version uint32 cpUcodeVersion; ///< Command processor feature version. uint32 pfpUcodeVersion; ///< Command processor, graphics prefetch firmware version. union { struct { uint64 support8bitIndices : 1; ///< Hardware natively supports 8bit indices uint64 support16BitInstructions : 1; ///< Hardware supports FP16 and INT16 instructions uint64 supportBorderColorSwizzle : 1; ///< Hardware supports border color swizzle uint64 supportDoubleRate16BitInstructions : 1; ///< Hardware supports double rate packed math uint64 supportFp16Fetch : 1; ///< Hardware supports FP16 texture fetches uint64 supportFp16Dot2 : 1; ///< Hardware supports a paired FP16 dot product. uint64 supportConservativeRasterization : 1; ///< Hardware supports conservative rasterization uint64 supportImplicitPrimitiveShader : 1; ///< Device supports implicit compiling of the /// hardware vertex shader as a primitive shader to /// perform culling and compaction optimizations in /// the shader. uint64 supportMeshShader : 1; ///< Indicates support for mesh shaders. uint64 supportTaskShader : 1; ///< Indicates support for task shaders. uint64 supportMsFullRangeRtai : 1; ///< HW supports full range render target array /// index for Mesh Shaders. uint64 supportPrtBlendZeroMode : 1; ///< Blend zero mode support. uint64 supports2BitSignedValues : 1; ///< Hardware natively supports 2-bit signed values. uint64 supportPrimitiveOrderedPs : 1; ///< Hardware supports primitive ordered UAV /// accesses in the PS. uint64 supportPatchTessDistribution : 1; ///< Hardware supports patch level tessellation /// distribution among VGTs. uint64 supportDonutTessDistribution : 1; ///< Hardware supports donut granularity of /// tessellation distribution among VGTs. uint64 supportTrapezoidTessDistribution : 1; ///< Hardware supports trapezoid granularity of /// tessellation distribution among VGTs. uint64 supportSingleChannelMinMaxFilter : 1; ///< Hardware supports min/max filtering that can /// return one channel at a time. uint64 supportPerChannelMinMaxFilter : 1; ///< Hardware returns min/max value on a per-channel /// basis. uint64 supportRgpTraces : 1; ///< Hardware supports RGP traces. uint64 supportMsaaCoverageOut : 1; ///< Set if HW supports MSAA coverage feature uint64 supportPostDepthCoverage : 1; ///< Set if HW supports post depth coverage feature uint64 supportSpiPrefPriority : 1; ///< Set if HW supports preference priority. uint64 supportWaveBreakSize : 1; ///< The HW supports specifying the wavebreak size /// in the pixel shader pipeline. uint64 supportsPerShaderStageWaveSize : 1; ///< If set, the "waveSize" setting in the /// @ref PipelineShaderInfo structure is meaningful. uint64 placeholder2 : 1; ///< Placeholder for backward compatibility, no use it. uint64 supportSpp : 1; ///< Hardware supports Shader Profiling for Power. uint64 timestampResetOnIdle : 1; ///< GFX timestamp resets after idle between /// submissions. The client cannot assume that /// timestamps will increase monotonically across /// command buffer submissions. uint64 support1xMsaaSampleLocations : 1; ///< HW supports 1xMSAA custom quad sample patterns #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 893 uint64 supportReleaseAcquireInterface : 1; ///< Set if HW supports the basic functionalities of /// acquire/release-based barrier interface. This /// provides CmdReleaseThenAcquire() as a convenient /// way to replace the legacy barrier interface's /// CmdBarrier() to handle single point barriers. #else uint64 placeholder4 : 1; ///< Placeholder for backward compatibility, no use it. #endif #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 883 uint64 supportSplitReleaseAcquire : 1; ///< Set if HW supports additional split barrier feature /// on top of basic acquire/release interface support. /// This provides CmdAcquire() and CmdRelease() to /// implement split barriers. /// Note: supportReleaseAcquireInterface is a /// prerequisite to supportSplitReleaseAcquire. #else uint64 placeholder3 : 1; ///< Placeholder for backward compatibility, no use it. #endif uint64 supportGl2Uncached : 1; ///< Indicates support for the allocation of GPU L2 /// un-cached memory. @see gl2UncachedCpuCoherency uint64 supportOutOfOrderPrimitives : 1; ///< HW supports higher throughput for out of order uint64 supportIntersectRayBarycentrics : 1; ///< HW supports the ray intersection mode which /// returns triangle barycentrics. uint64 supportFloat32BufferAtomics : 1; ///< Hardware supports float32 buffer atomics uint64 supportFloat32ImageAtomics : 1; ///< Hardware supports float32 image atomics uint64 supportFloat32BufferAtomicAdd : 1; ///< Hardware supports float32 buffer atomic add uint64 supportFloat32ImageAtomicAdd : 1; ///< Hardware supports float32 image atomic add uint64 supportFloat64Atomics : 1; ///< Hardware supports float64 atomics uint64 supportFloat32ImageAtomicMinMax : 1; ///< Hardware supports float32 image atomic min and max uint64 supportFloat64BufferAtomicMinMax : 1; ///< Hardware supports float64 buffer atomic min and max uint64 supportFloat64SharedAtomicMinMax : 1; ///< Hardware supports float64 shared atomic min and max uint64 support64BitInstructions : 1; ///< Hardware supports 64b instructions uint64 supportShaderSubgroupClock : 1; ///< HW supports clock functions across subgroup. uint64 supportShaderDeviceClock : 1; ///< HW supports clock functions across device. uint64 supportAlphaToOne : 1; ///< HW supports forcing PS output alpha channel to 1 uint64 supportCaptureReplay : 1; ///< HW supports captureReplay uint64 supportSortAgnosticBarycentrics : 1; ///< HW supports sort-agnostic Barycentrics for PS uint64 supportVrsWithDsExports : 1; ///< If true, asic support coarse VRS rates /// when z or stencil exports are enabled uint64 supportRayTraversalStack : 1; ///< HW assisted ray tracing traversal stack support uint64 supportPointerFlags : 1; ///< Ray tracing HW supports flags embedded in the node /// pointer bits uint64 supportTextureGatherBiasLod : 1; ///< HW supports SQ_IMAGE_GATHER4_L_O uint64 supportInt8Dot : 1; ///< Hardware supports a dot product 8bit. uint64 supportInt4Dot : 1; ///< Hardware supports a dot product 4bit. uint64 supportMixedSignIntDot : 1; ///< Hardware supports a integer dot product with mixed /// sign inputs. uint64 support2DRectList : 1; ///< HW supports PrimitiveTopology::TwoDRectList. uint64 supportHsaAbi : 1; ///< PAL supports HSA ABI compute pipelines. uint64 supportImageViewMinLod : 1; ///< Indicates image srd supports min_lod. uint64 supportStaticVmid : 1; ///< Indicates support for static-VMID uint64 support3dUavZRange : 1; ///< HW supports read-write ImageViewSrds of 3D images /// with zRange specified. uint64 supportCooperativeMatrix : 1; ///< HW supports cooperative matrix uint64 support1dDispatchInterleave : 1; ///< Indicates support for 1D Dispatch Interleave. uint64 support2dDispatchInterleave : 1; ///< Indicates support for 2D Dispatch Interleave. uint64 supportBFloat16 : 1; ///< HW supports bf16 instructions. uint64 supportFloat8 : 1; ///< HW supports float 8-bit instructions. uint64 supportInt4 : 1; ///< HW supports integer 4-bit instructions. uint64 supportCooperativeMatrix2 : 1; ///< HW supports Gfx12 extension cooperative matrix. uint64 placeholder14 : 2; uint64 reserved : 59; ///< Reserved for future use. }; uint64 u64All[2]; ///< Flags packed as 32-bit uint. } flags; ///< Device IP property flags. struct { #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 892 union { uint32 bufferView; #endif uint32 typedBufferView; ///< Size in bytes (and required alignment) of a typed buffer view SRD. /// @see IDevice::CreateTypedBufferViewSrds(). #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 892 }; #endif uint32 untypedBufferView; ///< Size in bytes (and required alignment) of a untyped buffer view SRD. /// @see IDevice::CreateUntypedBufferViewSrds(). uint32 imageView; ///< Size in bytes (and required alignment) of an image view SRD. /// @see IDevice::CreateImageViewSrds(). uint32 fmaskView; ///< Size in bytes (and required alignment) of an fmask view SRD. /// @see IDevice::CreateFmaskViewSrds(). This value can be zero to denote /// a lack of fMask support. uint32 sampler; ///< Size in bytes (and required alignment) of a sampler SRD. /// @see IDevice::CreateSamplerSrds(). uint32 bvh; ///< Size in bytes (and required alignment) of a BVH SRD /// Will be zero if HW doesn't support ray-tracing capabilities. /// @see IDevice::CreateBvhSrds(). } srdSizes; ///< Sizes for various types of _shader resource descriptor_ (SRD). struct { const void* pNullBufferView; ///< Pointer to null buffer view srd const void* pNullImageView; ///< Pointer to null image view srd const void* pNullFmaskView; ///< Pointer to null fmask view srd. This pointer can be nullptr to /// indicate a lack of fMask support. const void* pNullSampler; ///< Pointer to null sampler srd } nullSrds; ///< Null SRDs are used to drop shader writes or read 0 struct { float maxGpuClock; ///< Maximum GPU engine clock in MHz. For DX builds this value is valid only after /// the device has been finalized. float aluPerClock; ///< Maximum shader ALU operations per clock. float texPerClock; ///< Maximum texture fetches per clock. float primsPerClock; ///< Maximum primitives processed per clock. float pixelsPerClock; ///< Maximum pixels processed per clock. uint32 gfxipPerfRating; ///< Precomputed performance rating of the GfxIp block. } performance; ///< Performance-related device properties. struct { union { struct { uint32 eccProtectedGprs : 1; ///< Whether or not the GPU has ECC protection ///< on its VGPR's uint32 placeholder0 : 1; uint32 reserved : 30; ///< Reserved for future use. }; uint32 u32All; ///< Flags packed as a 32-bit unsigned integer. } flags; uint32 numShaderEngines; ///< Number of non-harvested shader engines. uint32 numShaderArrays; ///< Number of shader arrays. uint32 numCusPerShaderArray; ///< Number of CUs per shader array that are actually usable. uint32 maxCusPerShaderArray; ///< Maximum number of CUs per shader array. Count of physical CUs prior to ///< harvesting CUs for yield in certain variants of ASICs (ex: Fiji PRO). uint32 numSimdsPerCu; ///< Number of SIMDs per compute unit. uint32 numWavefrontsPerSimd; ///< Number of wavefront slots in each SIMD. uint32 numActiveRbs; ///< Number of active Renderbackends uint32 nativeWavefrontSize; ///< The native wavefront size. uint32 minWavefrontSize; ///< The smallest supported wavefront size. uint32 maxWavefrontSize; ///< All powers of two between the min size and max size are supported. uint32 numAvailableSgprs; ///< Number of available SGPRs. uint32 sgprsPerSimd; ///< Number of physical SGPRs per SIMD. uint32 minSgprAlloc; ///< Minimum number of SGPRs that can be allocated by a wave. uint32 sgprAllocGranularity; ///< SGPRs are allocated in groups of this size. Meaning, if your shader /// only uses 1 SGPR, you will still end up reserving this number of /// SGPRs. uint32 numAvailableVgprs; ///< Number of available VGPRs. uint32 vgprsPerSimd; ///< Number of physical VGPRs per SIMD. uint32 minVgprAlloc; ///< Minimum number of VGPRs that can be allocated by a wave. uint32 vgprAllocGranularity; ///< VGPRs are allocated in groups of this size. Meaning, if your shader /// only uses 1 VGPR, you will still end up reserving this number of /// VGPRs. On hardware where wave32 is available, the granularity for a /// wave64 shader is half of this value, but the VGPR allocation is /// double. The same number of total physical registers is allocated for /// each unit of allocation with either wave size. uint32 ldsSizePerCu; ///< Local Data Store size available in bytes per CU. uint32 ldsSizePerThreadGroup; ///< Local Data Store size available in bytes per thread-group. uint32 ldsGranularity; ///< Local Data Store allocation granularity expressed in bytes. uint32 gsPrimBufferDepth; ///< Hardware configuration for the GS prim buffer depth. uint32 gsVgtTableDepth; ///< Hardware configuration for the GS VGT table depth. uint32 numOffchipTessBuffers; ///< Number of offchip buffers that are used for offchip tessellation to /// pass data between shader stages. uint32 offchipTessBufferSize; ///< Size of each buffer used for passing data between shader stages when /// tessellation passes data using off-chip memory. uint32 tessFactorBufSizePerSe; ///< Size of GPU's the tessellatio-factor buffer, per shader engine. uint32 tccSizeInBytes; ///< Size of total L2 TCC cache in bytes. uint32 tcpSizeInBytes; ///< Size of one L1 TCP cache in bytes. There is one TCP per CU. uint32 maxLateAllocVsLimit; ///< Maximum number of VS waves that can be in flight without /// having param cache and position buffer space. uint32 shaderPrefetchBytes; ///< Number of bytes the SQ will prefetch, if any. uint32 gl1cSizePerSa; ///< Size in bytes of GL1 cache per SA. uint32 instCacheSizePerCu; ///< Size in bytes of instruction cache per CU/WGP. uint32 scalarCacheSizePerCu; ///< Size in bytes of scalar cache per CU/WGP. uint32 numAvailableCus; ///< Total number of CUs that are actually usable. uint32 numPhysicalCus; ///< Count of physical CUs prior to harvesting. /// Mask of active pixel packers. The mask is 128 bits wide, assuming a max of 32 SEs and a max of 4 pixel /// packers (indicated by a single bit each) per SE. uint32 activePixelPackerMask[ActivePixelPackerMaskDwords]; /// Mask of present, non-harvested CUs (Virtual Layout) uint32 activeCuMask[MaxShaderEngines][MaxShaderArraysPerSe]; } shaderCore; ///< Properties of computational power of the shader engine. } gfxipProperties; struct { union { struct { uint32 supportTurboSync : 1; ///< Whether TurboSync is supported by KMD uint32 enableUmdFpsCap : 1; ///< Whether UMD FPS CAP enabled uint32 isCwgSupported : 1; ///< KMD supports Creator Who Game (CWG) feature uint32 isGamingDriver : 1; ///< KMD works in gaming mode #if defined(_WIN32) uint32 canShareSemaphoreKmtHandle : 1; ///< OS supports sharing queue semaphores by KMT handle. #else uint32 placeholder0 : 1; #endif uint32 ifhModeEnabled : 1; ///< Whether the IFH mode is enabled uint32 requireFrameEnd : 1; ///< If the client must tag the last command buffer /// submission in each frame with a @ref CmdBufInfo with /// the frameEnd flag set. uint32 supportNativeHdrWindowing : 1; ///< Support HDR presentation that does not require FSE. uint32 flipQueueSupportsDecodeDst : 1; ///< If set, Decode destination images are supported /// in the OS flip-queue. uint32 supportFreeMux : 1; ///< Whether FreeMux is supported by KMD uint32 isDataCenterBoard : 1; ///< Whether the current board in use is a Data Center board. /// This is meant to support a unified VDI/CG driver package. #if defined(__unix__) uint32 hasPrimaryDrmNode : 1; ///< Set if the device has a primary DRM node. uint32 hasRenderDrmNode : 1; ///< Set if the device has a render DRM node. #else uint32 placeholder1 : 2; #endif uint32 forceAlignmentSupported : 1; ///< If PalPublicSettings::hardwareBufferAlignmentMode /// has any effect. uint32 reserved : 18; ///< Reserved for future use. }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< OS-specific property flags. #if (PAL_KMT_BUILD) int32 luidHighPart; ///< High 32 bits of the LUID (locally unique identifier) for this device. uint32 luidLowPart; ///< Low 32 bits of the LUID (locally unique identifier) for this device. bool supportNtHandle; ///< Support export/import resource with NT handle. bool supportNativefence; ///< Expose the capability of native fence support to client in DeviceProperties. #endif union { struct { uint32 support : 1; ///< Support Timeline type semaphore. uint32 supportHostQuery : 1; ///< Support Timeline type semaphore host query. uint32 supportHostWait : 1; ///< Support Timeline type semaphore host wait. uint32 supportHostSignal : 1; ///< Support Timeline type semaphore host signal. uint32 supportWaitBeforeSignal : 1; ///< Support Timeline type semaphore wait before signal. uint32 reserved : 27; ///< Reserved for future use. }; uint32 u32All; } timelineSemaphore; #if defined(__unix__) bool supportOpaqueFdSemaphore; ///< Support export/import semaphore as opaque fd in linux KMD. bool supportSyncFileSemaphore; ///< Support export/import semaphore as sync file in linux KMD. bool supportSyncFileFence; ///< Support export/import fence as sync file in linux KMD. #endif bool supportQueuePriority; ///< Support create queue with priority bool supportDynamicQueuePriority; ///< Support set the queue priority through IQueue::SetExecutionPriority #if (PAL_KMT_BUILD || PAL_AMDGPU_BUILD) bool supportMemoryBudgetQuery; ///< Support memory budget query through IDevice::QueryGpuMemoryBudgetInfo #endif #if defined(_WIN32) bool supportArbitaryPrtMapUnmap; ///< Support arbitary prt map unmap operation. #endif uint32 umdFpsCapFrameRate; ///< The frame rate of the UMD FPS CAP VirtualDisplayCapabilities virtualDisplayCaps; ///< Capabilities of virtual display, it's provided by KMD union { struct { uint32 supportDevice : 1; ///< GPU time domain uint32 supportClockMonotonic : 1; ///< POSIX CLOCK_MONOTONIC time domain uint32 supportClockMonotonicRaw : 1; ///< POSIX CLOCK_MONOTONIC_RAW time domain uint32 supportQueryPerformanceCounter : 1; ///< Windows Query Performance Counter time domain uint32 reserved : 28; ///< Reserved for future use. }; uint32 u32All; } timeDomains; #if defined(_WIN32) char driverStorePath[Util::MaxPathStrLen]; ///< Driver store path #endif #if defined(__unix__) int64 primaryDrmNodeMajor; ///< DRM primary node major number. int64 primaryDrmNodeMinor; ///< DRM primary node minor number. int64 renderDrmNodeMajor; ///< DRM render node major number. int64 renderDrmNodeMinor; ///< DRM render node minor number. #endif union { struct { uint32 supportPostflip : 1; ///< KMD support DirectCapture post-flip access uint32 supportPreflip : 1; ///< KMD support DirectCapture pre-flip access uint32 supportRSync : 1; ///< KMD support RSync uint32 maxFrameGenRatio : 4; ///< Maximum frame generation ratio or zero if not supported uint32 supportNonPrimary : 1; ///< KMD support non-primary DirectCapture auxiliary data uint32 reserved : 24; ///< Reserved for future use. }; uint32 u32All; } directCapture; } osProperties; ///< OS-specific properties of this device. struct { uint32 domainNumber; ///< PCI bus number. uint32 busNumber; ///< PCI bus number. uint32 deviceNumber; ///< PCI device number. uint32 functionNumber; ///< PCI function number. union { struct { uint32 gpuConnectedViaThunderbolt : 1; ///< Device is an externally housed GPU connected to the system /// via Thunderbolt. This will drastically impact CPU read and /// write performance of memory in the @ref GpuHeapLocal heap. uint32 gpuEmulatedInSoftware : 1; ///< Device is really a software package which emulates the /// GPU. This is meant for pre-silicon development. uint32 gpuEmulatedInHardware : 1; ///< Device is a hardware emulated GPU. This is meant for /// pre-silicon development. uint32 gpuVirtualization : 1; ///< Set if running under VM. uint32 atomicOpsSupported : 1; ///< Set if pcie atomic is supported. uint32 reserved : 27; ///< Reserved for future use. }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< PCI bus property flags. } pciProperties; ///< PCI bus properties of this device. BigSoftwareReleaseInfo bigSoftwareReleaseInfo; ///< Big Software (BigSW) Release Version information }; /// Defines callback function to notify client of private screen changes. typedef void (PAL_STDCALL *TopologyChangeNotificationFunc)(void* pClient); /// Defines callback function to notify client of the private screen removal. typedef void (PAL_STDCALL *DestroyNotificationFunc)(void* pOwner); /// Specifies the private screen topology change notification data. struct PrivateScreenNotifyInfo { void* pClient; ///< Pointer to client, PAL use this pointer as parameter when PAL /// calls callback pfnOnTopology. TopologyChangeNotificationFunc pfnOnTopology; ///< Pointer to client provided function. PAL should call this when /// the topology change happens and let the client handle the change. DestroyNotificationFunc pfnOnDestroy; ///< Pointer to client provdided function. PAL should call this when /// a private screen object is to be destroyed. The pOwner data is /// passed at @ref IPrivateScreen::BindOwner() time. }; /// Specifies fullscreen frame metadata control flags. Used for the KMD to notify clients about which types of frame /// metadata it needs to send to KMD. The meaning depends on the context: /// - During device finalization, client can set the flags indicating the specified metadata 'is supported' by client. /// - During present, client can query these flags that indicate which metadata 'is enabled' currently so that the /// client should send them to the KMD. union FullScreenFrameMetadataControlFlags { struct { uint32 timerNodeSubmission : 1; ///< Timer node submission, used for cases such as FRTC/FP/PFPA. uint32 frameBeginFlag : 1; ///< FrameBegin flag on CmdBufInfo, see CmdBufInfo for details. uint32 frameEndFlag : 1; ///< FrameEnd flag on CmdBufInfo, see CmdBufInfo for details. uint32 primaryHandle : 1; ///< Pending primary handle for pre-flip primary access (PFPA) uint32 p2pCmdFlag : 1; ///< P2P copy command. See CmdBufInfo comments for details. uint32 forceSwCfMode : 1; ///< Force software crossfire mode. uint32 postFrameTimerSubmission : 1; ///< It indicates whether the timer node submission at frame N is to /// synchronize the flip of frame N (postFrameTimerSubmission == TRUE) /// or N+1 (postFrameTimerSubmission == FALSE). /// It's only valid when timerNodeSubmission is also set. uint32 useHp3dForDwm : 1; ///< KMD Informs (DX11) UMD to use HP3D for DWM or not (Output only). uint32 expandDcc : 1; ///< KMD notifies UMD to expand DCC (Output only). uint32 enableTurboSyncForDwm : 1; ///< Indicates DWM should turn on TurboSync(Output only). uint32 enableDwmFrameMetadata : 1; ///< When cleared, no frame metadata should be sent for DWM(Output only). uint32 flipIntervalOverride : 3; ///< KMD-UMD interface FLIP_INTERVAL_OVERRIDE, for KMD to request flip /// interval override from UMD. uint32 disableFreeMux : 1; ///< KMD notifies UMD to disable FreeMux. uint32 maxFrameLatency : 2; ///< KMD can notify UMD to override the frame latency of an app. uint32 sendMotionVectors : 1; ///< Send the motion vector in CmdBufInfo once per frame uint32 sendDepth : 1; ///< Send the depth buffer in CmdBufInfo once per frame uint32 sendCameraMatrix : 1; ///< Send the camera matrix in CmdBufInfo once per frame uint32 sendHudLessImage : 1; ///< Send the HUD less image in CmdBufInfo once per frame uint32 reserved : 11; ///< Reserved for future use. }; uint32 u32All; ///< Flags packed as 32-bit uint. }; /// Indicates the desired UMD behavior with timer node submission. /// This is used to distinguish FP Vsync On + FreeSync Off case from HSync or FreeSync cases, the former case doesn't /// hold flip while the later cases do. enum class TimerNodeMode : uint32 { Unspecified, ///< Unspecified, client can decide what to do with the timer submission. ForceFlipHold, ///< Client must hold flip with the timer submission }; /// Specifies fullscreen frame metadata control data. Including FullScreenFrameMetadataControlFlags plus extended data. /// According to KMD's design, the difference is that 'flags' can be used to indicate 'client caps' during device /// initialization, while the 'data' is only passed from KMD to UMD. struct PerSourceFrameMetadataControl { FullScreenFrameMetadataControlFlags flags; ///< The frame metadata control flags TimerNodeMode timerNodeSubmissionMode; ///< Desired UMD behavior with timer node submission }; /// Specifies the texture optimization level to use for an image. /// /// @ingroup ResourceBinding enum class ImageTexOptLevel : uint32 { Default = 0, ///< Use device default setting Disabled, ///< Disable texture filter optimization Enabled, ///< Enable texture filter optimization Maximum, ///< Maximum texture filter optimization Count }; /// Specifies properties for @ref IDevice finalization. Input structure to IDevice::Finalize(). struct DeviceFinalizeInfo { union { struct { uint32 supportPrivateScreens : 1; ///< Initializes private screen support. uint32 requireFlipStatus : 1; ///< Requires to initialize flip status shared memory uint32 requireFrameMetadata : 1; ///< Requires to initialize frame metadata flags shared memory. /// Clients should only set this flag on the master device in an /// LDA chain. uint32 internalGpuMemAutoPriority : 1; ///< Forces internal GPU memory allocation priorities to be /// determined automatically. It is an error to set this flag /// if the device does not report that it supports this feature. uint32 reserved : 28; ///< Reserved for future use. }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< Device finalization flags. /// Specifies which engines of each type should be created for the device. struct { uint32 engines; ///< A mask of which engines are requested. } requestedEngineCounts[EngineTypeCount]; #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 914 /// Bytes of CE RAM to be used by the client for each engine type. This value must be <= ceRamSizeAvailable reported /// for that engine type. In the case where more than one engine of a given type is requested it is assumed each /// engine of that type will use this amount of CE RAM so the total size of (ceRamSizeUsed * queueCounts) must be <= /// ceRamSizeAvailable for that engine type. Each entry must be either zero or a multiple of 32 bytes. size_t ceRamSizeUsed[EngineTypeCount]; #endif /// @see PrivateScreenNotifyInfo /// Private screen notify info, must be filled when supportPrivateScreens=1. The client pointer and callback are to /// be saved in device. PAL should call the callback when there is any topology (hotplug) change with the client /// pointer as parameter. PrivateScreenNotifyInfo privateScreenNotifyInfo; /// Fullscreen frame metadata control flags indicating the types of metadata that the client supports. /// During adapter initialization, capable KMD notifies clients that it supports frame metadata, /// clients should then set these flags on device finalization info, indicating which types of metadata the client /// supports. FullScreenFrameMetadataControlFlags supportedFullScreenFrameMetadata; /// Specify the texture optimization level which only applies to internally-created views by PAL (e.g., for BLTs), /// client-created views must use the texOptLevel parameter in ImageViewInfo. ImageTexOptLevel internalTexOptLevel; }; /// Reports the compatibility and available features when using two particular devices in a multi-GPU system. Output /// structure from IDevice::GetMultiGpuCompatibility(). struct GpuCompatibilityInfo { union { struct { uint32 gpuFeatures : 1; ///< The devices have an exact feature match: same internal tiling, same /// pipeline binary data, etc. uint32 iqMatch : 1; ///< Devices produce images with same precision. uint32 peerTransferWrite : 1; ///< Peer-to-peer transfers write are supported. See /// IDevice::OpenPeerMemory() and IDevice::OpenPeerImage(). uint32 peerTransferRead : 1; ///< Peer-to-peer transfers based on xmgi are supported. /// See IDevice::OpenPeerMemory() and IDevice::OpenPeerImage(). uint32 sharedMemory : 1; ///< Devices can share memory objects with. IDevice::OpenSharedMemory(). uint32 sharedSync : 1; ///< Devices can share queue semaphores with /// IDevice::OpenSharedQueueSemaphore(). uint32 shareThisGpuScreen : 1; ///< Either device can present to this device. Means that the device /// indicated by the otherDevice param in /// IDevice::GetMultiGpuCompatibility() can present to the device the /// method was called on. uint32 shareOtherGpuScreen : 1; ///< Either device can present to the other device. Means that the /// device IDevice::GetMultiGpuCompatibility() was called on can present /// to the GPU indicated by the otherGpu param. uint32 peerEncode : 1; ///< whether encoding HW can access FB memory of remote GPU in chain uint32 peerDecode : 1; ///< whether decoding HW can access FB memory of remote GPU in chain uint32 peerTransferProtected : 1; ///< whether protected content can be transferred over P2P uint32 crossGpuCoherency : 1; ///< whether remote FB memory can be accessed without need for cache flush uint32 reserved : 20; ///< Reserved for future use. }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< GPU compatibility flags. }; /// Reports properties of a GPU memory heap. /// /// @note The performance ratings represent an approximate memory throughput for a particular access scenario, but /// should not be taken as an absolute performance metric. struct GpuMemoryHeapProperties { union { struct { uint32 cpuVisible : 1; ///< Accessible with IGpuMemory::Map() uint32 cpuGpuCoherent : 1; ///< Cache coherent between the CPU and GPU. uint32 cpuUncached : 1; ///< Not cached by CPU, but could still be GPU cached. uint32 cpuWriteCombined : 1; ///< CPU write-combined memory. uint32 holdsPinned : 1; ///< GPU memory objects created by IDevice::CreatePinnedGpuMemory() are in /// this heap. uint32 shareable : 1; ///< GPU memory objects in this heap can be shared between multiple devices. uint32 supportsTmz : 1; ///< This heap supports TMZ allocations. uint32 reserved : 25; ///< Reserved for future use. }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< GPU memory heap property flags. gpusize logicalSize; ///< Size of the heap in bytes. If HBCC is enabled, certain heaps may be /// virtualized and the logical size will exceed the physical size. gpusize physicalSize; ///< Physical size of the heap in bytes }; /// Reports properties of a specific GPU block required for interpretting performance experiment data from that block. /// See @ref PerfExperimentProperties. struct GpuBlockPerfProperties { bool available; ///< If performance data is available for this block. uint32 instanceCount; ///< How many instances of this block are in the device. uint32 maxEventId; ///< Maximum event ID for this block. uint32 maxGlobalOnlyCounters; ///< Number of counters available only for global counts. uint32 maxGlobalSharedCounters; ///< Total counters available including state shared between global and SPM. uint32 maxSpmCounters; ///< Counters available for streaming only. /// If the instance group size is equal to one, every block instance has its own independent counter hardware. /// PAL guarantees this is true for all non-DF blocks. /// /// Otherwise the instance group size will be a value greater than one which indicates how many sequential /// instances share the same counter hardware. The client must take care to not enable too many counters within /// each of these groups. /// /// For example, the DfMall block may expose 16 instances with 8 global counters but define a group size of 16. /// In that case all instances are part of one massive group which uses one pool of counter state such that no /// combination of DfMall counter configurations can exceed 8 global counters. uint32 instanceGroupSize; }; /// Reports performance experiment capabilities of a device. Returned by IDevice::GetPerfExperimentProperties(). struct PerfExperimentProperties { PerfExperimentDeviceFeatureFlags features; ///< Performance experiment device features. size_t maxSqttSeBufferSize; ///< SQTT buffer size per shader engine. size_t sqttSeBufferAlignment; ///< SQTT buffer size and base address alignment. uint32 shaderEngineCount; ///< Number of shader engines. /// Reports availability and properties of each device block. GpuBlockPerfProperties blocks[static_cast(GpuBlock::Count)]; }; /// Reports maximum alignments for images created with a @ref ImageTiling::Linear tiling mode assuming the images' /// elements are no larger than maxElementSize. struct LinearImageAlignments { uint16 maxElementSize; ///< Maximum element size in bytes. uint16 baseAddress; ///< Minimum required base address alignment in bytes. uint16 rowPitch; ///< Minimum required row pitch alignment in bytes. uint16 depthPitch; ///< Minimum required depth pitch alignment in bytes. }; /// Specifies image view type (i.e., 1D, 2D, 3D, or cubemap). /// /// @ingroup ResourceBinding enum class ImageViewType : uint32 { Tex1d = 0x0, Tex2d = 0x1, Tex3d = 0x2, TexCube = 0x3, Count }; /// Enumeration which defines the mode for magnification and minification sampling /// /// @ingroup ResourceBinding enum XyFilter : uint32 { XyFilterPoint = 0, ///< Use single point sampling XyFilterLinear, ///< Use linear sampling XyFilterAnisotropicPoint, ///< Use anisotropic with single point sampling XyFilterAnisotropicLinear, ///< Use anisotropic with linear sampling XyFilterCount }; /// Enumeration which defines the mode for volume texture sampling /// /// @ingroup ResourceBinding enum ZFilter : uint32 { ZFilterNone = 0, ///< Disable Z filtering ZFilterPoint, ///< Use single point sampling ZFilterLinear, ///< Use linear sampling ZFilterCount }; /// Enumeration which defines the mode for mip-map texture sampling /// /// @ingroup ResourceBinding enum MipFilter : uint32 { MipFilterNone = 0, ///< Disable Mip filtering MipFilterPoint, ///< Use single point sampling MipFilterLinear, ///< Use linear sampling MipFilterCount }; /// Specifies parameters for an image view descriptor controlling how a given texture is sampled /// /// @ingroup ResourceBinding struct TexFilter { union { struct { uint32 magnification : 2; ///< Used with enum XyFilter for Plane magnification filtering uint32 minification : 2; ///< Used with enum XyFilter for Plane minification filtering uint32 zFilter : 2; ///< Used with enum ZFilter for volume texture filtering uint32 mipFilter : 2; ///< Used with enum MipFilter for mip-map filtering uint32 reserved : 24; ///< Reserved for future use }; uint32 u32All; ///< Value of flags bitfield }; }; /// Determines if "TexFilter" should be ignored or not. enum class TexFilterMode : uint32 { Blend = 0x0, ///< Use the filter method specified by the TexFilter enumeration Min = 0x1, ///< Use the minimum value returned by the sampler, no blending op occurs Max = 0x2, ///< Use the maximum value returned by the sampler, no blending op occurs Count }; /// Specifies how texture coordinates outside of texture boundaries are interpreted. /// /// @ingroup ResourceBinding enum class TexAddressMode : uint32 { Wrap = 0x0, ///< Repeat the texture. Mirror = 0x1, ///< Mirror the texture by flipping it at every other coordinate interval. Clamp = 0x2, ///< Clamp the texture to the texture's edge pixel. MirrorOnce = 0x3, ///< Mirror the texture once then clamp. ClampBorder = 0x4, ///< Clamp the texture to the border color specified in the sampler. MirrorClampHalfBorder = 0x5, ///< Mirror the texture once then clamp the texture to half of the edge color. ClampHalfBorder = 0x6, ///< Clamp the texture to half of the edge color. MirrorClampBorder = 0x7, ///< Mirror the texture once then clamp the texture to the samler's border color. Count }; /// Specifies how a border color should be chosen when the TexAddressClampBorder texture addressing is used by a /// sampler. /// /// @ingroup ResourceBinding enum class BorderColorType : uint32 { White = 0x0, ///< White border color (1.0, 1.0, 1.0, 1.0). TransparentBlack = 0x1, ///< Transparent black border color (0.0, 0.0, 0,0, 0.0). OpaqueBlack = 0x2, ///< Opaque black border color (0.0, 0.0, 0.0, 1.0). PaletteIndex = 0x3, ///< Fetch border color from the border color palette. Count }; /// Residency maps are helper surfaces used in conjunction with PRT+. They reflect the resident mip levels /// associated with a given UV region of the parent image. enum class PrtMapAccessType : uint32 { Raw = 0x0, ///< Read / write the map image as a normal image. Read = 0x1, ///< Read the residency map as floating point data WriteMin = 0x2, ///< Write the residency map with min(existing,new) WriteMax = 0x3, ///< Write the residency map with max(existing,new) WriteSamplingStatus = 0x4, ///< Write to the sampling status map. Count }; /// Specifies compression behavior for an IImage or image/buffer view. enum class CompressionMode : uint32 { Default = 0, ///< Let PAL choose behavior. ReadEnableWriteEnable = 1, ///< Override default to force read and write compression on. ReadEnableWriteDisable = 2, ///< Support reading compressed data, but force any writes to be uncompressed (keeping /// physical metadata consistent). ReadBypassWriteDisable = 3, ///< Bypass physical metadata on reads (assume decompressed), all writes will be /// uncompressed and will write physical metatdata marking updated blocks as being /// uncompressed. This mode is intended to handle placed resources that do not /// want compression in memory allocations that have distributed compression enabled. /// WARNING: Using this mode to read compressed data will result in corruption. Count, }; /// Specifies client compression behavior for an IImage. enum class ClientCompressionMode : uint32 { Default = 0, ///< Let implementation decide whether to enable or disable Enable = 1, ///< Force enable Disable = 2, ///< Force disable DisableClearOnly = 3, ///< Force enable for all image views except for image clears Count, }; /// Specifies parameters for a buffer view descriptor that control how a range of GPU memory is viewed by a shader. /// /// Input to either CreateTypedBufferViewSrds() or CreateUntypedBufferViewSrds(). Used for any buffer descriptor, /// including read-only shader resources, UAVs, vertex buffers, etc. The usage of stride and format depends on the /// expected shader instruction access: /// /// + _Typed buffer_ access must set a valid format and channel mapping. /// + _Raw buffer_ access is indicated by setting an invalid format and setting stride to 1. /// + _Structured buffer_ access is indicated by setting an invalid format and setting stride to any value except 1. A /// stride of 0 maps all view accesses to the first structure stored in memory. /// /// _Typed buffer_ SRD's must be created using @ref IDevice::CreateTypedBufferViewSrds(). /// _Raw buffer_ and _structured buffer_ SRD's must be created using @ref IDevice::CreateUntypedBufferViewSrds(). /// /// If necessary, PAL will adjust the out of bounds read/write behavior to match the client's API requirements based on /// the client defines - PAL_CLIENT_VULKAN, etc. /// /// @ingroup ResourceBinding struct BufferViewInfo { gpusize gpuAddr; ///< GPU memory virtual address where the buffer view starts, in bytes. /// Must be aligned to bytes-per-element for typed access. gpusize range; ///< Restrict the buffer view to this many bytes. Will be rounded down to a ///< multiple of the stride. gpusize stride; ///< Stride in bytes. Must be aligned to bytes-per-element for typed access. SwizzledFormat swizzledFormat; ///< Format and channel swizzle for typed access. Must be Undefined for structured /// or raw access. CompressionMode compressionMode; ///< Specify GFX12-style distributed compression mode override for this view. /// Only relevant if the backing memory pages enable compression. union { struct { /// Set to have this surface independently bypass the MALL for read and / or write operations. /// If set, this overrides the GpuMemMallPolicy specified at memory allocation time. Meaningful /// only on GPUs that have supportsMall set in DeviceProperties. uint32 bypassMallRead : 1; uint32 bypassMallWrite : 1; uint32 reserved : 30; ///< Reserved for future use }; uint32 u32All; ///< Value of flags bitfield } flags; }; /// D3D12_VERTEX_BUFFER_VIEW structure. Match vertex offset mode. /// @ref ICmdBuffer::CmdSetVertexBuffers function. struct VertexBufferView { gpusize gpuva; ///< GPU virtual address to vertex buffer. uint32 sizeInBytes; ///< The size in bytes of the buffer. uint32 strideInBytes; ///< The size in bytes of each vertex entry. }; /// Specifies an array of vertex buffer view descriptors. PAL supports multiple vertex buffer modes which change how /// the GPU memory is viewed by a shader. This struct wraps up the choice between these modes. /// /// If offsetMode is true PAL will create "vertex offset mode" SRDs using the @ref VertexBufferView array. This new SRD /// has stronger robustness. It avoids that the last vertex is removed when buffer size is not multiple of the stride. /// /// @see ICmdBuffer::CmdSetVertexBuffers struct VertexBufferViews { uint32 firstBuffer; ///< First vertex buffer slot to change. Must be less than @ref MaxVertexBuffers. uint32 bufferCount; ///< The length of the buffer view array. bool offsetMode; ///< If true, pVertexBufferViews must be used, otherwise pBufferViewInfos is used. union { const VertexBufferView* pVertexBufferViews; ///< Offset mode vertex buffer views. Valid if offsetMode is true. const BufferViewInfo* pBufferViewInfos; ///< Index mode vertex buffer views. Valid if offsetMode is false. }; }; /// Specifies parameters for an image view descriptor controlling how a shader will view the specified image. /// /// Input to CreateImageViewSrd(). Used for any image view descriptor, including read-only shader resources and UAVs. /// /// @ingroup ResourceBinding struct ImageViewInfo { const IImage* pImage; ///< Image associated with the view. ImageViewType viewType; ///< 1D, 2D, 3D, or Cubemap. Typically this should match the image type, but a /// Cubemap view can be imposed on a 2D array image. SwizzledFormat swizzledFormat; ///< Specifies the image view format and channel swizzle. Must be compatible (same /// bit-widths per channel) with the image's base format. /// @note: YUV formats are invalid for an ImageView. A format should be chosen to be /// compatible with either the luma or chroma plane(s) of the YUV format. SubresRange subresRange; ///< Specifies a subset of subresources to include in the view. If the base Image /// has a YUV planar format, the number of array slices in the range must be 1. /// If zRange feature is used, the number of mips in the range must be 1. float minLod; ///< Minimum mip level of detail to use for this view. uint32 samplePatternIdx; ///< Index into the currently bound MSAA sample pattern palette to be /// read/evaluated when samplepos shader instructions are executed on this /// view. Can be ignored if the samplepos shadinstruction will not be used. /// Must be less than MaxSamplePatternPaletteEntries. See /// IDevice::SetSamplePatternPalette(). Range zRange; ///< Specifies the z offset and z range. ImageTexOptLevel texOptLevel; ///< Specific the texture optimization level. const IImage* pPrtParentImg; ///< Meaningful only if "mapAccess" is not "raw". PrtMapAccessType mapAccess; ///< Type of access to be done if "pImage" is a PRT+ meta-data image. /// See @ref ImageCreateInfo ImageLayout possibleLayouts; ///< Union of all possible layouts this view can be in while accessed by this view. /// (ie. what can be done with this SRD without having a layout transition?) /// In DX, for example, it's possible that a texture SRV could be accessed in a state /// with all other read-only usages allowed, but a UAV must exclusively be accessed /// in the UNORDERED_ACCESS state. /// The primary purpose of this flag is to avoid compressed shader writes if a /// different usage does not support compression and PAL won't get an opportunity to /// decompress it (ie. a transition in a barrier) CompressionMode compressionMode; ///< Specify GFX12-style distributed compression mode override for this view. /// Only relevant if the backing IImage resource and its bound memory pages /// enable compression. ReadBypassWriteDisable is only valid if compressionMode /// in ImageCreateInfo disables compressed write. union { struct { /// Set to have this surface independently bypass the MALL for read and / or write operations. /// If set, this overrides the GpuMemMallPolicy specified at memory allocation time. Meaningful /// only on GPUs that have supportsMall set in DeviceProperties. uint32 bypassMallRead : 1; uint32 bypassMallWrite : 1; uint32 zRangeValid : 1; ///< whether z offset/ range value is valid. uint32 includePadding : 1; ///< Whether internal padding should be included in the view range. uint32 reserved : 28; ///< Reserved for future use }; uint32 u32All; ///< Value of flags bitfield } flags; ///< Image view flags. }; /// Specifies parameters controlling execution of sample instructions in a shader. Input to CreateSamplerSrd(). /// /// @ingroup ResourceBinding struct SamplerInfo { TexFilterMode filterMode; ///< Min/max filtering modes TexFilter filter; ///< Filtering to apply to texture fetches. TexAddressMode addressU; ///< Addressing mode for U texture coords outside of the [0..1] range. TexAddressMode addressV; ///< Addressing mode for V texture coords outside of the [0..1] range. TexAddressMode addressW; ///< Addressing mode for W texture coords outside of the [0..1] range. float mipLodBias; ///< Bias for mipmap level of detail selection. uint32 maxAnisotropy; ///< Anisotropy value clamp when the filter mode is TexFilterAnisotropic. CompareFunc compareFunc; ///< Comparison function to apply to fetched data. float minLod; ///< High-resolution mipmap LOD clamp. float maxLod; ///< Low-resolution mipmap LOD clamp. BorderColorType borderColorType; ///< Selects border color when an address mode is TexAddressClampBorder. uint32 borderColorPaletteIndex; ///< Choose color from the border color palette when borderColorType is /// BorderColorPalette. float anisoThreshold; ///< Opt-in, flags.useAnisoThreshold == 1 and flags.preciseAniso == 0. /// The value should be computed taking account the maxAnisotropy /// setting. This is a high resolution value which is quantized and /// clamped down to 3 bits to the domain [0.0, 0.875] for current Hw. /// We can interpret the functioning of the threshold value as follows. /// maxAnisotropy per-pixel can be 1, 2, 4, 8 or 16 (N). /// During sampling, the initial count (or S) is computed in Hw for each /// quad and the domain for current Hw is [0-16]. /// Final sample count = min(pow(2, ceil(log2(S - anisoThreshold))), N) /// Note: when flags.useAnisoThreshold == 0, Pal will ignore this value /// and instead use a maximum of 0.25 at the highest anisotropic setting. /// It is important to be aware that this feature tunes quality vs /// performance, so care should be taken to not degrade image quality /// 'noticeably' when enabling using this feature uint32 perfMip; ///< Controls the value of the PERF_MIP field in Sampler SRD's. /// This field basically controls the Fractional part of the LOD /// calculation. if LOD is fractional so let us say 1.23, in this case /// you must avg.out your samples from both MIP 1 and 2.But if PERF_MIP /// is set to nonzero the HW will perform an optimization and may fetch /// from only 1 MIP. // These values are used to define a filtering line used when sampling a residency map. The defined // slopes in both the X (U) and Y (V) directions are to avoid visible disconnects when sampling between // different samples. Offset2d uvOffset; ///< u/v offset value selectors. Values specified are in /// log2 of fractions of pixel. i.e., 1 / (1 << x). Not all values /// are supported by all HW. Offset2d uvSlope; ///< u/v slope value selectors. Supported slope values are /// specified in degrees. In the case of a 3D image, the supplied /// uvSlope.y is interpreted as wSlope. /// 0 2.5 /// 1 3 /// 2 4 /// 3 5 /// 4 8 /// 5 16 /// 6 32 /// 7 64 /// other values: unsupported union { struct { uint32 mgpuIqMatch : 1; ///< Enables image compatibility for MGPU scenarios where paired devices /// come from different hardware families. uint32 preciseAniso : 1; ///< Anisotropic filtering should prefer precision over speed. uint32 unnormalizedCoords : 1; ///< If set then always use unnormalized texture coordinates instead of /// zero to one. Only works under certain conditions (no mip filtering, /// no computed LOD, no offsets, only edge or border clamp address modes) uint32 truncateCoords : 1; ///< If set then hardware will truncate mantissa instead of /// rounding to nearest even in float point to fixed point /// texture coordinate conversion uint32 seamlessCubeMapFiltering : 1; ///< If set then there's filtering across the edges of the cube map. uint32 prtBlendZeroMode : 1; ///< Allow unmapped PRT texels to be treated as zero and blended with /// mapped texels. If set to 0, the destination of the sample instruction /// is written with all 0s when TFE == 0; if set to 1, Treat unmapped /// texels as zeros and blend them with other mapped texels, write the /// result of this sample instruction to the destination GPRs. uint32 useAnisoThreshold : 1; ///< If set, Hw will use the value assigned in anisoThreshold, but /// only if preciseAniso is set to 0, also. /// This allows the sampler to turn off overriding anisotropic filtering when the resource view contains a /// single mipmap level. Not all graphics IP supports overriding anisotropic filtering, and this flag will /// be ignored for such GPUs. uint32 disableSingleMipAnisoOverride : 1; uint32 forResidencyMap : 1; ///< Set if the surface being sampled is a residency map used in PRTs. /// Only meaningful if the corresponding ImageView's mapAccess is set to /// "read". Only valid for devices that report the "PrtFeaturePrtPlus" /// flag. uint32 reserved : 23; ///< Reserved for future use }; uint32 u32All; ///< Value of flags bitfield } flags; }; /// Specifies which heuristic should be utilized for sorting children when box sorting is enabled enum class BoxSortHeuristic : uint32 { ClosestFirst = 0x0, ///< Traversal is ordered to enter the children that ///< intersect the ray closer to the ray origin first. ///< This is good baseline option. Default option for RT IP 1.x. LargestFirst = 0x1, ///< Traversal is ordered to enter the children that have the largest ///< interval where the box intersects the ray first. ///< Good for shadow rays with terminate on first hit. ClosestMidPoint = 0x2, ///< Traversal is ordered to enter the children that have a midpoint in the interval ///< where the box intersects that has the lowest intersection time before clamping( ///< Good for reflection rays. Disabled = 0x3, ///< Box sort and heuristic are disabled. Count }; /// Specifies parameter for creating a BvH (bounding volume hierarchy, used by ray-trace) descriptor struct BvhInfo { const IGpuMemory* pMemory; ///< Memory object holding the BVH nodes gpusize offset; ///< Offset from memory address specified by pMemory. Combination of /// pMemory address and the offset must be 256 byte aligned. gpusize numNodes; ///< Number of nodes in the view uint32 boxGrowValue; ///< Number of ULPs (unit in last place) to be added during ray-box test. BoxSortHeuristic boxSortHeuristic; ///< Specifies which heuristic should be utilized for ///< sorting children when box sorting is enabled union { struct { uint32 useZeroOffset : 1; ///< If set, SRD address is programmed to zero uint32 returnBarycentrics : 1; ///< When enabled, ray intersection will return triangle barycentrics. /// Note: Only valid if @see supportIntersectRayBarycentrics is true. /// Set to have this surface independently bypass the MALL for read and / or write operations. /// If set, this overrides the GpuMemMallPolicy specified at memory allocation time. Meaningful /// only on GPUs that have supportsMall set in DeviceProperties. uint32 bypassMallRead : 1; uint32 bypassMallWrite : 1; uint32 pointerFlags : 1; ///< If set, flags are encoded in the node pointer bits uint32 highPrecisionBoxNode : 1; ///< If set, enable 64-byte high precision box node uint32 wideSort : 1; ///< If set, enable wide sort uint32 hwInstanceNode : 1; ///< If set, enable hardware instance node uint32 sortTrianglesFirst : 1; ///< If set, triangle nodes are treated specially during child sorting uint32 compressedFormatEn : 1; ///< If set, enable compressed format support. This include enable /// support for compressed primitive packets, BVH8-128B box nodes, /// and changes to triangle intersection test return data. uint32 reserved : 22; ///< Reserved for future HW }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< BVH creation flags. }; /// Specifies parameters for an fmask view descriptor. /// /// Input to CreateFmaskViewSrd(). Allows the client to access fmask from a shader using the load_fptr IL instruction. /// /// @ingroup ResourceBinding struct FmaskViewInfo { const IImage* pImage; ///< Image associated with the fmask view. uint32 baseArraySlice; ///< First slice in the view. uint32 arraySize; ///< Number of slices in the view. union { struct { uint32 shaderWritable : 1; ///< True if used with an image that has been transitioned to a shader- /// writable image state (e.g. [Graphics|Compute][WriteOnly|ReadWrite]) uint32 reserved : 31; ///< Reserved for future use }; uint32 u32All; ///< Value of flags bitfield } flags; ///< Fmask view flags }; /// Element of the multisample pattern representing a sample position (X, Y), type of SamplePatternPalette, which /// matches the layout defined by SC. struct SamplePos { float x; ///< x coordinate of sample position. float y; ///< y coordinate of sample position. uint32 reserved1; ///< reserved for future use uint32 reserved2; ///< reserved for future use }; /// Specifies a palette of MSAA sample patterns used by the client. Input to SetSamplePatternPalette, which is used /// to implement samplepos shader instruction support. typedef SamplePos SamplePatternPalette[MaxSamplePatternPaletteEntries][MaxMsaaRasterizerSamples]; /// Provides a GPU timestamp along with the corresponding CPU timestamps, for use in calibrating CPU and GPU timelines. struct CalibratedTimestamps { uint64 gpuTimestamp; ///< GPU timestamp value compatible with ICmdBuffer::CmdWriteTimestamp(). uint64 cpuClockMonotonicTimestamp; ///< POSIX CLOCK_MONOTONIC timestamp uint64 cpuClockMonotonicRawTimestamp; ///< POSIX CLOCK_MONOTONIC_RAW timestamp uint64 cpuQueryPerfCounterTimestamp; ///< Windows QueryPerformanceCounter timestamp uint64 maxDeviation; ///< Maximum deviation in nanoseconds between the GPU and CPU timestamps }; /// Specifies connector types enum class DisplayConnectorType : uint32 { Unknown = 0, ///< Unknown connector type Vga, ///< VGA DviD, ///< DVI_D DviI, ///< DVI_I Hdmi, ///< HDMI Dp, ///< DP Edp, ///< EDP Minidp, ///< MINI_DP Count }; /// Specifies properties for display connectors connected to GPU struct DisplayConnectorProperties { DisplayConnectorType type; ///< Connector type - VGA, DVI, HDMI, DP etc }; /// Specifies pre-defined power profile which is used to communicate with KMD/PPLib and set correspond power states. enum class PowerProfile : uint32 { Default = 0, ///< Default power profile. VrCustom = 1, ///< Power profile used by custom VR scenario. VrDefault = 2, ///< Power profile used by default VR scenario. Idle = 3, ///< Power profile used for forced DPM0, in case HMD is taken off but the game is still running. Count }; /// Fine-grain power switch info. struct PowerSwitchInfo { uint32 time; ///< Time in microseconds, relative to the frame start at V-sync. Clients should consider the /// powerSwitchLatency value reported in @ref PrivateScreenProperties when specifying /// switch times. uint32 performance; ///< Performance to be set (between 0-100), which is mapped to a certain DPM level by KMD. }; /// Maximum number of power switch info allowed in one custom power profile. static constexpr uint32 MaxNumPowerSwitchInfo = 5; /// Fine-grain power management for dynamic power mode. This structure specifies multiple DPM states to be cycled /// through each frame. struct CustomPowerProfile { IPrivateScreen* pScreen; ///< Dynamic power mode needs V-sync so a private screen object is needed. uint32 numSwitchInfo; ///< Number of discrete DPM states to cycle through per frame. Number entries /// in switchInfo[] and actualSwitchInfo[]. PowerSwitchInfo switchInfo[MaxNumPowerSwitchInfo]; ///< Specifies the set of power states to cycle through each /// frame. Each entry specifies an offset into the frame where /// the DPM state should be switched, and a rough performance /// requirement value which will be translated into an /// appropriate DPM state by KMD. PowerSwitchInfo actualSwitchInfo[MaxNumPowerSwitchInfo]; ///< The actual set of power states that KMD/PPLib sets. }; /// Flags for IDevice::AddGpuMemoryReferences(). Depending on their residency model, a client may set these flags as /// directed by the application or hard-code them to a single value. Driver-internal memory references should be marked /// as CantTrim unless the client explicitly handles trim support. /// /// Note that the CantTrim and MustSucceed flags are based on the same WDDM flags; it is expected that PAL will ignore /// them on non-WDDM platforms. enum GpuMemoryRefFlags : uint32 { GpuMemoryRefCantTrim = 0x1, ///< The caller can't or won't free this allocation on OS request. GpuMemoryRefMustSucceed = 0x2, ///< Hint to the OS that we can't process a failure here, this may result in a TDR. GpuMemoryRefAllFlags = 0x3 ///< Clients should NOT use it, for internal static_assert purpose only. }; /// Specifies input arguments for IDevice::GetPrimaryInfo(). Client must specify a display ID and properties of the /// primary surface that will drive that display in order to query capabilities. struct GetPrimaryInfoInput { uint32 vidPnSrcId; ///< Video present source id. uint32 width; ///< Primary surface width. uint32 height; ///< Primary surface height. SwizzledFormat swizzledFormat; ///< Format and swizzle of the primary surface. Rational refreshRate; ///< Video refresh rate, this is only valid if refreshRateValid is set. union { struct { uint32 qbStereoRequest : 1; ///< Going to set a stereo mode. uint32 refreshRateValid : 1; ///< Refresh rate is valid. uint32 freeSyncInCrossFireSupport : 1; ///< True if client supports FreeSync in CrossFire. uint32 useKmdCalcFramePacing : 1; ///< True if client uses KMD frame pacing. If so, the client /// creates a timer queue to delay the present, and the delay /// value is calculated by KMD. uint32 reserved : 28; ///< Reserved for future use. }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< get primary surface info input flags. }; /// Specifies output arguments for IDevice::GetStereoDisplayModes(), returning supported stereo mode struct StereoDisplayModeOutput { Extent2d extent; ///< Dimensions in pixels WxH. Rational refreshRate; ///< Refresh rate. SwizzledFormat format; ///< Format and swizzle of the primary surface. }; /// Specifies output arguments for IDevice::GetActive10BitPackedPixelMode(), returning which, if any, 10-bit /// display mode is active. struct Active10BitPackedPixelModeOutput { bool isInWs10BitMode; ///< Whether the workstation 10-bit feature is enabled. bool notifyKmd10bitsPresent; ///< When in 10-bit mode and at present time, if the /// client driver sees a 10-bit to 8-bit surface blt, /// it needs to call RequestKmdReinterpretAs10Bit() to /// inform the KMD that the dst surface must be reinterpreted /// as 10-bits per channel for all KMD-initiated BLTs. PackedPixelType packedPixelType; ///< Format of the packed pixels. uint32 pixelPackRatio; ///< The number of 10-bit pixels that are packed into one 8-8-8-8 /// format pixel. }; /// Specifies primary surface stereo mode. enum StereoMode : uint32 { StereoModeHwAlignedViews = 0, ///< The stereo views are HW aligned on the display. StereoModeSwPackedViews = 1, ///< The layout of the stereo views on the display are determined by the client. StereoModeNotSupported = 2, ///< Not support stereo mode StereoModeSideBySide = 3, ///< The two stereo views are put side by side on the display. StereoModeTopBottom = 4 ///< One stereo view is on the top of the display, and the other is on the bottom. }; /// Enumerates the supported workstation stereo modes. enum class WorkstationStereoMode : uint32 { Disabled, ViaConnector, ///< Active Stereo for 3 Pin VESA connector. ViaBlueLine, ///< Blue line Active Stereo for laptops. Passive, ///< Passive Stereo (Dual head). PassiveInvertRightHoriz, ///< Passive Stereo with Horizontal Invert (Dual Head). PassiveInvertRightVert, ///< Passive Stereo with Vertical Invert (Dual Head). Auto, ///< Auto Stereo Vertical Interleaved. AutoHoriz, ///< Auto Stereo Horizontal Interleaved. AutoCheckerboard, ///< Auto Stereo Checkerboard Interleaved. AutoTsl, ///< Tridelity SL Auto Stereo. Count, }; /// Specifies output arguments for IDevice::GetPrimaryInfo(), returning capabilitiy information for a display in /// a particular mode. struct GetPrimaryInfoOutput { uint32 tilingCaps; ///< Tiling caps supported by this primary surface. StereoMode stereoMode; ///< Stereo mode supported by this primary surface. uint32 mallCursorCacheSize; ///< Size of the mall cursor cache in bytes union { struct { /// MGPU flag: this primary surface supports DVO HW compositing mode. uint32 dvoHwMode : 1; /// MGPU flag: this primary surface supports XDMA HW compositing mode. uint32 xdmaHwMode : 1; /// MGPU flag: this primary surface supports client doing SW compositing mode. uint32 swMode : 1; /// MGPU flag: this primary surface supports freesync. uint32 isFreeSyncEnabled : 1; /// Single-GPU flag: gives hint to the client that they should use rotated tiling mode. uint32 hwRotationPortraitMode : 1; /// Single-GPU flag: this primary surface supports non local heap. uint32 displaySupportsNonLocalHeap : 1; /// Reserved for future use. uint32 reserved : 26; }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< get primary surface support info output flags. }; /// Specifies different clock modes that the device can be set to. enum class DeviceClockMode : uint32 { Default = 0, ///< Device clocks and other power settings are restored to default. Query = 1, ///< Queries the current device clock ratios. Leaves the clock mode of the device unchanged. Profiling = 2, ///< Scale down from peak ratio. Clocks are set to a constant amount which is /// known to be power and thermal sustainable. The engine/memory clock ratio /// will be kept the same as much as possible. MinimumMemory = 3, ///< Memory clock is set to the lowest available level. Engine clock is set to /// thermal and power sustainable level. MinimumEngine = 4, ///< Engine clock is set to the lowest available level. Memory clock is set to /// thermal and power sustainable level. Peak = 5, ///< Clocks set to maximum when possible. Fan set to maximum. Note: Under power /// and thermal constraints device will clock down. QueryProfiling = 6, ///< Queries the profiling device clock ratios. Leaves the clock mode of the device unchanged. QueryPeak = 7, ///< Queries the peak device clock ratios. Leaves the clock mode of the device unchanged. Count }; /// Specifies input argument to IDeive::SetClockMode. The caller can read the clock ratios the device is currently /// running by querying using the mode DeviceClockMode::DeviceClockModeQuery. struct SetClockModeOutput { uint32 memoryClockFrequency; /// Current mem clock (absolute) value in Mhz uint32 engineClockFrequency; /// Current gpu core clock (absolute) value in Mhz }; /// Specifies input argument to IDeive::SetClockMode. The caller must specify the mode in which to set the device. struct SetClockModeInput { DeviceClockMode clockMode; ///< Used to specify the clock mode for the device. }; /// Specifies primary surface MGPU compositing mode. enum MgpuMode : uint32 { MgpuModeOff = 0, ///< MGPU compositing mode off, the client does not do SW compositing at all, e.g. AFR disabled. MgpuModeSw = 1, ///< MGPU SW compositing mode, the client handle the SW compositing. MgpuModeDvo = 2, ///< MGPU DVO HW compositing mode MgpuModeXdma = 3, ///< MGPU XDMA HW compositing mode MgpuModeCount }; /// Specifies input arguments for IDevice::SetMgpuMode(). A client set a particular MGPU compositing mode and whether /// frame pacing is enabled for a display. struct SetMgpuModeInput { uint32 vidPnSrcId; ///< Video present source id. MgpuMode mgpuMode; ///< Primary surface MGPU compositing mode. bool isFramePacingEnabled; ///< True if frame pacing enabled. If so, the client creates a timer queue /// to delay the present, and the delay value is calculated by KMD. }; constexpr uint32 XdmaMaxDevices = 8; ///< Maximum number of Devices for XDMA compositing. /// Specifies XDMA cache buffer info for each gpu. struct XdmaBufferInfo { uint32 bufferSize; ///< XDMA cache buffer size of each device uint32 startAlignment; ///< XDMA cache buffer start alignment of each device }; /// Specifies output arguments for IDevice::GetXdmaInfo(), returning the XDMA cache buffer information of each GPU for /// a display. struct GetXdmaInfoOutput { XdmaBufferInfo xdmaBufferInfo[XdmaMaxDevices]; ///< Output XDMA cache buffer info }; /// Specifies flipping status flags on a specific VidPnSource. It's Windows specific. union FlipStatusFlags { struct { uint32 immediate : 1; ///< Is immediate flip uint32 dwmFlip : 1; ///< Is DWM conducted flip uint32 iFlip : 1; ///< Is independent exclusive flip uint32 reserved : 29; ///< Reserved for future use. }; uint32 u32All; ///< Flags packed as 32-bit uint. }; /// Specifies the VSync mode of virtual display. enum class VirtualDisplayVSyncMode : uint32 { Default = 0, ///< Using the default VSync mode based on refresh rate Immediate = 1, ///< The presentation should be executed immediately without waiting for vsync to display HMD = 2, ///< Using HMD VSync, the HMD is specified by pPrivateScreen Count }; /// The VirtualDisplayInfo is provided by application and KMD uses it to create a virtual display. /// @see IDevice::CreateVirtualDisplay. struct VirtualDisplayInfo { uint32 width; ///< Horizontal dimension in pixels uint32 height; ///< Vertical dimension in pixels Rational refreshRate; ///< Refresh rate of virtual display VirtualDisplayVSyncMode vsyncMode; ///< VSync mode uint32 vsyncOffset; ///< VSync front porch location in pixels or lines. /// It's needed when VSyncMode is HMD Pal::IPrivateScreen* pPrivateScreen; ///< A pointer to IPrivateScreen. /// It's needed when VSyncMode is HMD }; /// Function pointer type definition for creating a buffer view SRD. /// /// @see IDevice::CreateTypedBufferViewSrds()/CreateUntypedBufferViewSrds(). /// /// @param [in] pDevice Pointer to the device this function is called on. /// @param [in] count Number of buffer view SRDs to create; size of the pBufferViewInfo array. /// @param [in] pBufferViewInfo Array of buffer view descriptions directing SRD construction. /// @param [out] pOut Client-provided space where opaque, hardware-specific SRD data is written. /// /// @ingroup ResourceBinding typedef void (PAL_STDCALL *CreateBufferViewSrdsFunc)( const IDevice* pDevice, uint32 count, const BufferViewInfo* pBufferViewInfo, void* pOut); /// Function pointer type definition for creating an image view SRD. /// /// @see IDevice::CreateImageViewSrds(). /// /// @param [in] pDevice Pointer to the device this function is called on. /// @param [in] count Number of buffer view SRDs to create; size of the pImageViewInfo array. /// @param [in] pImgViewInfo Array of image view descriptions directing SRD construction. /// @param [out] pOut Client-provided space where opaque, hardware-specific SRD data is written. /// /// @ingroup ResourceBinding typedef void (PAL_STDCALL *CreateImageViewSrdsFunc)( const IDevice* pDevice, uint32 count, const ImageViewInfo* pImgViewInfo, void* pOut); /// Function pointer type definition for creating a fmask view SRD. /// /// @see IDevice::CreateFmaskViewSrds(). /// /// @param [in] pDevice Pointer to the device this function is called on. /// @param [in] count Number of fmask view SRDs to create; size of the pFmaskViewInfo array. /// @param [in] pFmaskViewInfo Array of fmask view descriptions directing SRD construction. /// @param [out] pOut Client-provided space where opaque, hardware-specific SRD data is written. /// /// @ingroup ResourceBinding typedef void (PAL_STDCALL *CreateFmaskViewSrdsFunc)( const IDevice* pDevice, uint32 count, const FmaskViewInfo* pFmaskViewInfo, void* pOut); /// Function pointer type definition for creating a sampler SRD. /// /// @see IDevice::CreateSamplerSrds(). /// /// @param [in] pDevice Pointer to the device this function is called on. /// @param [in] count Number of sampler SRDs to create; size of the pSamplerInfo array. /// @param [in] pSamplerInfo Array of sampler descriptions directing SRD construction. /// @param [out] pOut Client-provided space where opaque, hardware-specific SRD data is written. /// /// @ingroup ResourceBinding typedef void (PAL_STDCALL *CreateSamplerSrdsFunc)( const IDevice* pDevice, uint32 count, const SamplerInfo* pSamplerInfo, void* pOut); /// Function pointer type definition for creating a ray tracing SRD. /// /// @see IDevice::CreateBvhSrds(). /// /// @param [in] pDevice Pointer to the device this function is called on. /// @param [in] count Number of BVH SRDs to create; size of the pBvhInfo array. /// @param [in] pBvhInfo Array of BVH descriptions directing SRD construction. /// @param [out] pOut Client-provided space where opaque, hardware-specific SRD data is written. /// /// @ingroup ResourceBinding typedef void (PAL_STDCALL *CreateBvhSrdsFunc)( const IDevice* pDevice, uint32 count, const BvhInfo* pBvhInfo, void* pOut); /// Decode a buffer SRD back into most of the BufferViewInfo used to create it via Create{Typed,Untyped}BufferViewSrds. /// /// @param [in] pDevice /// @param [in] pBufferViewSrd Pointer to the SRD. Should not be in dedicated GPU memory. /// @param [out] pViewInfo Pointer to memory to be filled with decoded info. typedef void (PAL_STDCALL *DecodeBufferViewSrdFunc)( const IDevice* pDevice, const void* pBufferViewSrd, BufferViewInfo* pViewInfo); /// Structure filled in by DecodeImageViewSrd(). struct DecodedImageSrd { SwizzledFormat swizzledFormat; ///< SRD's swizzled format. SubresRange subresRange; ///< SRD's subresource range; the array-range is always { 0, 1 } for 3D images. Range zRange; ///< z-range of the SRD's subresRange.startSubres. }; /// Decode an image SRD back into the main parameters used to create it via CreateImageViewSrds(). /// /// @param [in] pDevice /// @param [in] pImage The same image the SRD was created on. /// @param [in] pImageViewSrd Pointer to the SRD. Should not be in dedicated GPU memory. /// @param [out] pDecodedInfo Pointer to memory to be filled with decoded info. typedef void (PAL_STDCALL *DecodeImageViewSrdFunc)( const IDevice* pDevice, const IImage* pImage, const void* pImageViewSrd, DecodedImageSrd* pDecodedInfo); /// Function pointer table for SRD methods. struct DeviceInterfacePfnTable { CreateBufferViewSrdsFunc pfnCreateTypedBufViewSrds; ///< Typed Buffer view SRD creation function pointer. CreateBufferViewSrdsFunc pfnCreateUntypedBufViewSrds; ///< Untyped Buffer view SRD creation function ptr. CreateImageViewSrdsFunc pfnCreateImageViewSrds; ///< Image view SRD creation function pointer. CreateFmaskViewSrdsFunc pfnCreateFmaskViewSrds; ///< Fmask View SRD creation function pointer. CreateSamplerSrdsFunc pfnCreateSamplerSrds; ///< Sampler SRD creation function pointer. CreateBvhSrdsFunc pfnCreateBvhSrds; ///< BVH SRD creation function pointer. DecodeBufferViewSrdFunc pfnDecodeBufferViewSrd; ///< Buffer SRD decode function pointer. DecodeImageViewSrdFunc pfnDecodeImageViewSrd; ///< Image SRD decode function pointer. }; /// Specifies output arguments for IDevice::QueryWorkstationCaps(), returning worksation feature information /// on this device workstation board. union WorkStationCaps { struct { uint32 workStationBoard : 1; ///< Running a workstation driver on a workstation board. /// On workstation boards that support CWG (Creator Who Game), /// the user can switch to a Gaming/consumer driver on the /// workstation board, and then this will be false. uint32 supportWorkstationAppPerfOpt : 1; ///< Workstation boards have optimizations for kinds of workstation /// applications. These optimization is enabled if it is set. uint32 supportWorkstationEdgeFlag : 1; ///< Workstation boards have a DX9 feature that edge flag can be /// exported via point size output in VS. The feature is enabled /// if it is set. uint32 reserved : 29; ///< Reserved for future use. }; uint32 u32All; ///< Flags packed as 32-bit uint. }; /// FrameLock/GenLock support state enum enum class FlglSupport : uint32 { NotAvailable = 0, ///< FL/GL not supported by the GPU NotConnected = 1, ///< FL/GL support available in the GPU, but is not connected to a GLSync board Available = 2, ///< FL/GL support available and connected Count }; /// Container structure for FrameLock/GenLock state. struct FlglState { union { struct { uint32 genLockEnabled : 1; ///< True if genlock is currently enabled. Genlock is a system-wide setting ///< in CCC. Genlock provides a singal source (which is used in framelock) uint32 frameLockEnabled : 1; ///< True if (KMD) framelock is currently enabled. ///< Framelock is the mechanism to sync all presents in multiple adapters. uint32 isTimingMaster : 1; ///< True if the display being driven by the current adapter is the timing ///< master in a genlock configuration uint32 reserved : 29; ///< Reserved for future use. }; uint32 u32All; ///< Packed 32-bit uint value. }; FlglSupport support; ///< The state of the FLGL support in current adapter uint32 firmwareVersion; ///< Firmware version number of the GLSync hardware (S400 board), if available }; /// GlSync setting mask definition, used with GlSyncConfig enum GlSyncConfigMask : uint32 { GlSyncConfigMaskSignalSource = 0x00000001, GlSyncConfigMaskSyncField = 0x00000002, GlSyncConfigMaskSampleRate = 0x00000004, GlSyncConfigMaskSyncDelay = 0x00000008, GlSyncConfigMaskTriggerEdge = 0x00000010, GlSyncConfigMaskScanRateCoeff = 0x00000020, GlSyncConfigMaskFrameLockCntl = 0x00000040, GlSyncConfigMaskSigGenFrequency = 0x00000080 }; /// specify GLSYNC framelock control state enum GlSyncFrameLockCtrl : uint32 { GlSyncFrameLockCntlNone = 0x00000000, GlSyncFrameLockCntlEnable = 0x00000001, GlSyncFrameLockCntlDisable = 0x00000002, GlSyncFrameLockCntlResetSwapCounter = 0x00000004, GlSyncFrameLockCntlAckSwapCounter = 0x00000008, GlSyncFrameLockCntlVersionKmd = 0x00000010 }; /// Specifies GlSync Signal Source enum GlSyncSignalSource : uint32 { GlSyncSignalSourceGpuMask = 0x0FF, GlSyncSignalSourceUndefined = 0x100, GlSyncSignalSourceFreerun = 0x101, GlSyncSignalSourceBncPort = 0x102, GlSyncSignalSourceRj45Port1 = 0x103, GlSyncSignalSourceRj45Port2 = 0x104 }; /// Specifies GlSync Sync Field enum GlSyncSyncField : uint8 { GlSyncSyncFieldUndefined = 0, GlSyncSyncFieldBoth = 1, GlSyncSyncField1 = 2 }; /// Specifies GlSync Sync Trigger Edge enum GlSyncTriggerEdge : uint8 { GlSyncTriggerEdgeUndefined = 0, GlSyncTriggerEdgeRising = 1, GlSyncTriggerEdgeFalling = 2, GlSyncTriggerEdgeBoth = 3 }; /// Specifies GlSync scan rate coefficient/multiplier options enum GlSyncScanRateCoeff : uint8 { GlSyncScanRateCoeffUndefined = 0, GlSyncScanRateCoeffx5 = 1, GlSyncScanRateCoeffx4 = 2, GlSyncScanRateCoeffx3 = 3, GlSyncScanRateCoeffx5Div2 = 4, GlSyncScanRateCoeffx2 = 5, GlSyncScanRateCoeffx3Div2 = 6, GlSyncScanRateCoeffx5Div4 = 7 }; /// Container structure for FrameLock/GenLock config. struct GlSyncConfig { uint32 validMask; ///< Mask that specifies which settings are actually referred in the structure. /// GlSyncConfigMask* uint32 syncDelay; ///< Delay of sync signal in microseconds uint32 framelockCntlVector; ///< Vector of Framelock control bits. GlSyncFrameLockCntl* uint32 signalSource; ///< Source of sync signal. Can be House Sync, RJ45 Port or GPUPort. /// GlSyncSignalSource* or GPUPort Index uint8 sampleRate; ///< Number of VSyncs per sample. 0 - no sampling, syncronized by singal VSync. uint8 syncField; ///< Sync to Field 1 or to both Fields when input signal is interlaced. /// GlSyncSyncField* uint8 triggerEdge; ///< Which edge should be used as trigger. GlSyncTriggerEdge* uint8 scanRateCoeff; ///< Scan Rate Multiplier applied to original sync signal. GlSyncScanRateCoeff* uint32 sigGenFrequency; ///< Frequency in mHz of internal signal generator }; #if PAL_KMT_BUILD /// The type of NT object, it decides which KMT function should be called to retrieve the NT handle. enum class ExternalObjectType : uint32 { Allocation = 0, ///< Specifies the NT object is an allocation, such as GPUMemory etc. SyncObject = 1, ///< Specifies the NT object is a sync object, such as semaphore and fence etc. }; /// Specifies the NT handle attributes when opening from name. struct ExternalHandleInfo { ExternalObjectType objectType; ///< The type of NT object, it decides which KMT function should /// be called to retrieve the NT handle. const _SECURITY_ATTRIBUTES* pSecurityAttributes; ///< It specifies the security descriptor and the inheritable /// attribute. const wchar_t* pNtObjectName; ///< A name to NT handle, if the object is exported as a NT handle /// with a name, and then the handle can be acquired via this /// name. uint32 accessFlags; ///< Desried access rights of the object the handle refers to. }; #endif #if (PAL_KMT_BUILD || PAL_AMDGPU_BUILD) /// Gpu heap group enumeration. One heap group contains several pal GpuHeap. enum GpuHeapGroup : uint32 { GpuHeapGroupLocal = 0x0, /// Local heap group includes GpuHeapLocal and GpuHeapInvisible on Windows. /// But GpuHeapInvisible is not included on Linux. GpuHeapGroupNonLocal = 0x1, /// NonLocal heap group includes GpuHeapGartUswc and GpuHeapGartCacheable. #if PAL_AMDGPU_BUILD GpuHeapGroupInvisible = 0x2, /// This is used on Linux as GpuHeapLocal and GpuHeapInvisible are not combined. #endif GpuHeapGroupCount, }; /// Struct for querying current gpu memory usage info and budget info. struct GpuMemoryBudgetInfo { #if PAL_AMDGPU_BUILD gpusize systemUsage[GpuHeapGroupCount]; /// Current total memory usage of specified heap group of whole system. #else gpusize usage[GpuHeapGroupCount]; /// Current total memory usage of specified heap group of current process. gpusize budget[GpuHeapGroupCount]; /// Current total memory budget of specified heap group of the device which /// implies how much memory the device can allocate from that heap group /// before allocations may fail or cause performance degradation, including /// all allocated memory. Budget might be affected by OS status and other /// processes. #endif }; #endif /// Reclaim allocation result enumeration. enum class ReclaimResult : uint8 { Ok = 0, ///< Reclaim result is OK. Discarded = 1, ///< Reclaim result is discarded. NotCommitted = 2, ///< Reclaim result is not committed. Count }; #if defined(_WIN32) /// KMD has an interface in place to support UMD registration of Events to be signaled based on various /// system-level conditions. Each enum here represents a certain condition which will cause KMD to signal /// the event Clients previously registered through RegisterEvent. enum class EventTrackingType : uint32 { ShaderInterrupt = 0, EarlyPresent = 1, Count }; /// Clients specify the Util::Event and EventTrackingType it wants to register with PAL in this struct. /// Under the condition specified by the trackingType, pEvent will be signaled by KMD. struct RegisterEventInfo { Util::Event* pEvent; EventTrackingType trackingType; }; /// As an input of UnregisterEvent to specify the event and trackingType Clients want to unregister from KMD. struct UnregisterEventInfo { Util::Event* pEvent; EventTrackingType trackingType; }; /// Output parameter of RegisterEvent. There's one struct for each type of EventTrackingType. union RegisterEventOutputInfo { struct { uint32 eventId; gpusize eventMailboxGpuVa; } shaderInterrupt; struct { uint32 eventId; } earlyPresent; }; #endif /// Contains the page fault status of the GPU. struct PageFaultStatus { union { struct { uint32 pageFault : 1; ///< Set if there was a GPU page fault. uint32 readFault : 1; ///< Set if the page fault was during a read operation. uint32 reserved : 30; ///< Reserved for future use. }; uint32 u32All; } flags; gpusize faultAddress; ///< GPU virtual address where page fault occurred. Ignored if @ref pageFault is not set. }; /// Input to the RegisterRuntimeState call, which allows the HIP runtime to pass information to KMD which can be shared /// with the HIP debugger. struct HipRuntimeSetup { const void* pRdebug; ///< Address of the r_debug structure in the runtime uint32 runtimeState; ///< Runtime-specific enum indicating runtime state uint32 ttmpSetupHint; ///< Hint indicating that ttmp values should be initialized }; /** *********************************************************************************************************************** * @interface IDevice * @brief Interface representing a client-configurable context for a particular GPU. * * This object becomes the root of all client/PAL interaction to get work done on that GPU. The main functionality * provided by the device object: * * + Creation of all other PAL objects. * + GPU memory management. *********************************************************************************************************************** */ class IDevice { public: /// Get the maximum alignments for images created with a @ref ImageTiling::Linear tiling mode assuming the images' /// elements are no larger than pAlignments->maxElementSize. /// /// @param [out] pAlignments Its maxElementSize will be used to fill the rest of its members with valid alignments. /// /// @returns Success if pAlignments was filled with data. Otherwise, one of the following errors may be returned: /// + ErrorInvalidPointer if pAlignments is null. /// + ErrorInvalidValue if pAlignments->maxElementSize is zero. virtual Result GetLinearImageAlignments( LinearImageAlignments* pAlignments) const = 0; /// Fills out a structure with details on the properties of this device. This includes capability flags, /// supported engines/queues, performance characteristics, etc. This should only be called after a client has /// called @ref CommitSettingsAndInit(). /// /// @see DeviceProperties /// /// @param [out] pInfo Properties structure to be filled out by PAL based on properties of this device. /// /// @returns Success if the device properties were successfully returned in pInfo. Otherwise, one of the /// following errors may be returned: /// + ErrorInvalidPointer if pInfo is null. virtual Result GetProperties( DeviceProperties* pInfo) const = 0; /// Checks and returns execution state of the device. Currently unsupported for DX clients and /// will return Unavailable if called by those clients. /// /// @param [out] pPageFaultStatus This struct is filled out when this function returns ErrorGpuPageFaultDetected. /// /// @returns Success if device is operational and running. Otherwise, one of the following errors may be /// + ErrorDeviceLost if device is lost, reset or not responding, /// + ErrorInvalidValue if failed to get device reset state, /// + ErrorOutOfGpuMemory if ran out of GPU memory, /// + ErrorGpuPageFaultDetected if page fault was detected, /// + ErrorUnknown if device is in unknown state. virtual Result CheckExecutionState( PageFaultStatus* pPageFaultStatus) = 0; /// Returns this devices client-visible settings structure initialized with appropriate defaults. Clients can /// modify parameters in this structure as they wish in order to modify PAL's behavior for this device. After /// modifying settings, the client must call CommitSettingsAndInit() before creating finalizing the device. /// /// @warning The returned value points to an internal PAL structure. Modifying data using this pointer after /// calling CommitSettingsAndInit() will result in undefined behavior. /// /// @returns Pointer to this devices public settings for examination and/or modification by the client. virtual PalPublicSettings* GetPublicSettings() = 0; /// Reads a specific setting from the operating system specific source (e.g. registry or config file). /// /// @param [in] pSettingName Name of the setting. Must be null-terminated. /// @param [in] settingScope The scope of settings accessible. /// @param [in] valueType The type of the setting to return (e.g. bool or int). /// @param [out] pValue Buffer to write data that was read. Must be non-null. /// @param [out] bufferSz Size of string buffer (pValue). Only necessary for ValueType::Str. /// @param [out] pInRegistry If not null, this will be set to true if the setting was read from the registry. /// /// @returns True if the read of specified setting is successful. False indicates failure. virtual bool ReadSetting( const char* pSettingName, SettingScope settingScope, Util::ValueType valueType, void* pValue, size_t bufferSz = 0, bool* pInRegistry = nullptr) const = 0; #if defined(_WIN32) /// Clients call this function to register a Util::Event() object to be signaled based on specified conditions. /// For now, we will only add one condition: shader interrupt. /// /// @param [in] input RegisterEventInfo which contains Util::Event client wants to be signaled and /// EventTrackingType. /// @param [out] pOutput Depending on the trackingType in input, the pOutput may refers different info /// which clients may need. Refer to the definition of RegisterEventOutputInfo /// for details. /// @returns Success if no errors occurred. virtual Result RegisterEvent( const RegisterEventInfo& input, RegisterEventOutputInfo* pOutput) = 0; /// Clients call this function to unregister an event previously registered with RegisterEvent(). /// /// @param [in] input UnregisterEventInfo which contains Util::Event and tracing type the client /// wants to unregister. /// @returns Success if no errors occurred. virtual Result UnregisterEvent( const UnregisterEventInfo& input) = 0; #endif /// Indicates that the client has finished overriding public settings so the settings struct can be finalized and /// any late-stage initialization can be done. This method must be called before @ref IDevice::Finalize() can be /// called. /// /// @note The only functions in IDevice that are able to be called before CommitSettingsAndInit(): /// + GetLinearImageAlignments() /// + GetPublicSettings() /// + ReadSetting() /// /// @note Finalizing the settings may override values set by the client. This can occur if: /// + Invalid settings, either because they are not supported by hardware or are somehow self-conflicting, /// will be overridden. /// + Settings specified in the private settings will override client-specified settings. /// /// @returns Success if settings have been committed successfully and any late-stage initialization is completed /// successfully as well. virtual Result CommitSettingsAndInit() = 0; /// Returns the largest possible GPU memory alignment requirement for any IGpuMemoryBindable object created on this /// device. /// /// This is useful for clients that may want to allocate generic GPU memory rafts up front to support many objects /// without creating all of those objects to query their alignment requirements ahead of time. In practice, most /// objects have a much smaller alignment requirement than the allocation granularity, but images may require more /// than the allocation granularity on some devices. /// /// @returns Largest possible GPU memory byte alignment for an IGpuMemoryBindable object on this device. virtual gpusize GetMaxGpuMemoryAlignment() const = 0; /// Indicates that the client is able to finalize the initialization of this device with the requisite information. /// This method must be called before any of the factory creation methods may be called. /// /// @note The only functions in IDevice that are able to be called before Finalize(): /// + The functions listed in IDevice::CommitSettingsAndInit(). /// + GetMaxGpuMemoryAlignment() /// + GetProperties() /// /// @param [in] finalizeInfo Device finalization properties. /// /// @returns Success if final initialization is successful. virtual Result Finalize( const DeviceFinalizeInfo& finalizeInfo) = 0; /// Cleans up all internal state, undoing any work done by CommitSettingsAndInit() and Finalize(). Following a call /// to this function, the device will be in its initial state as if it was re-enumerated; the client may requery /// settings and build up the device for further use. If the client doesn't call this function, it will be called /// automatically when IPlatform::Destroy() is called or when devices are re-enumerated. /// /// This function provides clients with a way to return devices to a trival state, one in which they have no /// lingering OS or kernel driver dependencies. If a client pairs external state (e.g., an OS handle) with their /// devices they may be required to call this function when they destroy their API device objects. /// /// It is expected that all PAL objects created by the device have already been destroyed (e.g. GPU memory, queues), /// if not, the device may fall into an illegal state and the client will experience undefined behavior. /// /// @returns Success if no errors occurred. virtual Result Cleanup() = 0; /// Returns if dual-source blending can be enabled. It checks the ColorBlendStateCreateInfo for any src1 blending /// options. Then it checks if we are going to override those src1 options because the blend func is /// min or max. /// /// @param [in] createInfo The ColorBlendStateCreateInfo that is checked for conditions that call for dual-source /// blending. /// /// @returns true if the blend state calls for dual-source blending to be enabled. virtual bool CanEnableDualSourceBlend( const ColorBlendStateCreateInfo& createInfo) const = 0; /// Specifies how many frames can be placed in the presentation queue. This limits how many frames the CPU can get /// in front of the device. /// /// @param [in] maxFrames Maximum number of frames that can be batched. Specifying a value of 0 resets the limit to /// a default system value (3 frames on Windows). /// /// @returns Success if the limit was successfully adjusted. Otherwise, one of the following errors may be /// returned: /// + ErrorUnavailable if this function is not available on this OS. virtual Result SetMaxQueuedFrames( uint32 maxFrames) = 0; /// Compares this device against another device object to determine how compatible they are for multi-GPU /// operations. /// /// @param [in] otherDevice Device to determine MGPU compatibility with. /// @param [out] pInfo Result compatibility info. /// /// @returns Success if the compatibility info was successfully returned in pInfo. Otherwise, one of the following /// errors may be returned: /// + ErrorInvalidPointer if pInfo is null. virtual Result GetMultiGpuCompatibility( const IDevice& otherDevice, GpuCompatibilityInfo* pInfo) const = 0; /// Reports properties of all GPU memory heaps available to this device (e.g., size, whether it is CPU visible or /// not, performance characteristics, etc.). /// /// @param [out] info Properties of each GPU heap available to this device, indexed by the GPU ID defined in /// @ref GpuHeap. If a particular heap is unavailable, its entry will report a size of 0. /// /// @returns Success if the heap properties were successfully queried and returned in info[]. Otherwise, one of the /// following errors may be returned: /// + ErrorUnknown if an unexpected internal error occured. virtual Result GetGpuMemoryHeapProperties( GpuMemoryHeapProperties info[GpuHeapCount]) const = 0; /// Reports all format and tiling mode related properties for this device. /// /// @param [out] pInfo Output properties. /// /// @returns Success if the properties were successfully queried and returned in pProperties. Otherwise, one of the /// following errors may be returned: /// + ErrorInvalidPointer if pInfo is null. virtual Result GetFormatProperties( MergedFormatPropertiesTable* pInfo) const = 0; /// Reports performance experiment related properties for this device. /// /// Enumerates the GPU family, blocks, capabilities, etc.. /// /// @param [out] pProperties Output properties. /// /// @returns Success if the properties were successfully queried and returned in pProperties. Otherwise, one of the /// following errors may be returned: /// + ErrorInvalidPointer if pProperties is null. virtual Result GetPerfExperimentProperties( PerfExperimentProperties* pProperties) const = 0; /// Fills out the default MSAA quad sample pattern for the given sample count. /// /// @param [in] samples The number of valid samples in the sample pattern. Must be a power of two. /// @param [out] pQuadSamplePattern Fill this with the default pattern. /// /// @returns Success if @ref pQuadSamplePattern was filled with the default sample pattern. /// Otherwise, one of the following errors may be returned: /// + ErrorInvalidPointer if @ref pQuadSamplePattern is null. /// + ErrorInvalidValue if @ref samples is not a supported power of two. /// + ErrorUnavailable if this device lacks GfxIp support. virtual Result GetDefaultSamplePattern( uint32 samples, MsaaQuadSamplePattern* pQuadSamplePattern) const = 0; /// Adds a list of per-device memory object references that persist across command buffer submissions. It is the /// responsibility of the client to make sure that all required memory references have been added before submitting /// the command buffer that uses on them. References can be added at the device, queue or specified at submit time. /// gpuMemRefCount and ppGpuMemory cannot be 0/null. PAL will assert and crash if these values are invalid. If /// multiple references are provided for the same memory, PAL will retain the safest set of GpuMemoryRef flags. /// /// see @ref IQueue::Submit() /// /// @param [in] gpuMemRefCount Number of memory references in the memory reference list, must be non-zero. /// @param [in] pGpuMemoryRefs Array of gpuMemRefCount GPU memory references. /// @param [in] pQueue Optional IQueue that the memory references will be used on, used to optimize /// residency operations, can be null. Note, if a queue is specified here the same queue /// should be specified in RemoveGpuMemoryReferences. /// @param [in] flags Flags from GpuMemoryRefFlags that will apply to all memory object references. /// /// @returns Success if the memory references were successfully added. Can also return NotReady if the client /// passes in a valid pPagingFence pointer and the operation doesn't complete before the function returns. /// Otherwise, one of the following errors may be returned: /// + ErrorTooManyMemoryReferences if gpuMemRefCount will cause the total reference count to exceed the /// limit of this device. /// + ErrorOutOfMemory if GPU memory objects will not fit in available GPU memory space (i.e. GPU Memory /// is overcommitted). virtual Result AddGpuMemoryReferences( uint32 gpuMemRefCount, const GpuMemoryRef* pGpuMemoryRefs, IQueue* pQueue, uint32 flags ) = 0; /// Removes a list of per-device memory object references that have previously been added via /// IDevice::AddGpuMemoryReferences(). PAL is responsible for ensuring that timestamps have been retired prior to /// actually performing any residency operations related to removal of a memory reference, so clients are free to /// call this function without regard for command buffer use. Memory references are reference counted, so an /// individual memory reference will only be removed when the total internal reference count reaches zero. /// gpuMemoryCount and ppGpuMemory cannot be 0/null, PAL will assert and crash if these values are invalid. /// /// @param [in] gpuMemoryCount Number of memory objects in the memory reference list (size of ppGpuMemory array). /// This count must be greater than zero. /// @param [in] ppGpuMemory Array of GPU memory references. /// @param [in] pQueue Optional IQueue that the memory references were used on, used to optimize residency /// operations, can be null. Note, if a queue was specified in AddGpuMemoryReferences, /// pQueue must match. /// /// @returns Success if the memory references were successfully updated. virtual Result RemoveGpuMemoryReferences( uint32 gpuMemoryCount, IGpuMemory*const* ppGpuMemory, IQueue* pQueue ) = 0; /// Queries the Device for the total amount of referenced GPU memory for each heap type. These totals include all /// memory added to the Device or any Queue using @ref AddGpuMemoryReferences and not yet removed using @ref /// RemoveGpuMemoryReferences. Internal PAL allocations are included in these totals, but memory referenced using /// the per-submit list in @ref IQueue::Submit is not included in these amounts. /// /// The intended use for this interface is for clients to be able to manage budgeting of resident GPU memory. /// /// @param [out] referencedGpuMemTotal Array containing the total amount of referenced GPU memory for each GPU /// memory heap. virtual void GetReferencedMemoryTotals( gpusize referencedGpuMemTotal[GpuHeapCount]) const = 0; /// Get primary surface MGPU support information based upon primary surface create info and input flags provided /// by client. /// /// This function should not be called by clients that rely on PAL for compositor management. Basically, if your /// client uses the IScreen's interface to take full screen exclusive mode, then don't call this. /// /// @param [in] primaryInfoInput Primary surface info input arguments. /// @param [in,out] pPrimaryInfoOutput Primary surface info output arguments. /// /// @returns Success if the primary surface MGPU support information were successfully queried. virtual Result GetPrimaryInfo( const GetPrimaryInfoInput& primaryInfoInput, GetPrimaryInfoOutput* pPrimaryInfoOutput) const = 0; /// Returns the supported stereo modes list. /// /// @param [in,out] pStereoModeCount Input value specifies the maximum number of stereo modes to enumerate, and the /// output value specifies the total number of stereo modes that were enumerated /// in pStereoModeList. The input value is ignored if pStereoModeList is null. /// This pointer must not be null. /// @param [out] pStereoModeList Output list of stereo modes. Can be null, in which case the total number of /// available modes will be written to pStereoModeCount. /// /// @returns Success if the display modes were successfully queried and the results were reported in /// pStereoModeCount/pStereoModeList. Otherwise, one of the following errors may be returned: /// + Unsupported if stereo mode is not supported, or the stereo modes can't be queried. /// + ErrorOutOfMemory if temp memeory allocation failed. virtual Result GetStereoDisplayModes( uint32* pStereoModeCount, StereoDisplayModeOutput* pStereoModeList) const = 0; /// Returns the currently selected Workstation stereo mode on Windows OS. /// /// @param [out] pWsStereoMode Output currently selected Workstation Stereo mode. /// /// @returns Success if the currently selected Workstation stereo mode were successfully queried /// and the results were reported in pWsStereoMode. virtual Result GetWsStereoMode(WorkstationStereoMode* pWsStereoMode) const = 0; /// Return information about active workstation support for 10-bit (potentially packed pixel) displays. /// /// @param [out] pMode Output reports if the workstation 10-bit display feature is enabled, and if so, /// details on any required pixel packing. /// /// @returns Success if the 10-bits and packed-pixel format were successfully queried and the result were /// reported in pMode. virtual Result GetActive10BitPackedPixelMode( Active10BitPackedPixelModeOutput* pMode) const = 0; /// Inform the KMD that this allocation must be reinterpreted as 10-bits per channel for the all /// KMD-initiated BLTs. /// /// When in 10-bit mode and at present time, if the dx9p driver sees a 10-bit to 8-bit surface blt, /// it will use this interface to inform the KMD that the blt dst surface must be reinterpreted as /// 10-bits per channel for the all KMD-initiated BLTs. /// /// @param [in] pGpuMemory The dst GPU memory reference which will be marked as 10 bits format. /// /// @returns Success if the KMD has been sucessfully notified. virtual Result RequestKmdReinterpretAs10Bit( const IGpuMemory* pGpuMemory) const = 0; /// Set or query device clock mode. /// /// This function can be called by clients to set the device engine and memory clocks to certain pre-defined ratios. /// If a call to restore the device clocks to default does not occur, the device stays in the previously set mode. /// /// @param [in] setClockModeInput Specify the clock mode to set the device to. /// @param [out] pSetClockModeOutput @b Optional - Output device clock mode. If not nullptr, it is used /// to query the current clock mode the device is running in. /// /// @returns Success if the device clock mode query/set request was successful. virtual Result SetClockMode( const SetClockModeInput& setClockModeInput, SetClockModeOutput* pSetClockModeOutput) = 0; /// Request to enable/disable static VMID for the device. /// /// The function must be called with enable = true before a profiling session starts and enable = false after a /// profiling session ends. It may be called any time, though it is illegal to disable without a prior corresponding // enable/acquire. Only after this returns success (when enabling) can the driver make submissions targeting the /// static VMID. /// /// @param [in] enable Specifies whether acquiring or releasing the static VMID /// /// @returns Success if the static VMID acquire/release request was successful. virtual Result SetStaticVmidMode( bool enable) = 0; /// Set up MGPU compositing mode of a display provided by client. /// /// This function should not be called by clients that rely on PAL for compositor management. Basically, if your /// client uses the IScreen's interface to take full screen exclusive mode, then don't call this. /// /// @param [in] setMgpuModeInput Set MGPU compositing mode input arguments. /// /// @returns Success if the MGPU compositing mode were successfully set. virtual Result SetMgpuMode( const SetMgpuModeInput& setMgpuModeInput) const = 0; /// Get XDMA cache buffer information of each GPU based upon video present source ID provided by client. /// /// This function should not be called by clients that rely on PAL for compositor management. Basically, if your /// client uses the IScreen's interface to take full screen exclusive mode, then don't call this. /// /// @param [in] vidPnSrcId Video present source id. /// @param [in] gpuMemory Primary surface GPU memory. /// @param [in,out] pGetXdmaInfoOutput Set XDMA cache buffer info output arguments. /// /// @returns Success if the XDMA cache buffer information were successfully queried. virtual Result GetXdmaInfo( uint32 vidPnSrcId, const IGpuMemory& gpuMemory, GetXdmaInfoOutput* pGetXdmaInfoOutput) const = 0; /// Polls current fullscreen frame metadata controls on given vidPnSourceId, including extended data. /// /// The function is used by clients that support frame metadata through KMD-UMD shared memory. /// It polls the frame metadata shared memory for the given VidPnSource. Indicating which types of metadata /// the UMD should send to KMD. /// Clients should only call this function on the master device in an LDA chain. /// /// @param [in] vidPnSrcId Video present source id /// @param [out] pFrameMetadataControl @b Optional - Output frame metadata controls. Clients can pass null to /// check if the buffer is initialized successfully and if the /// vidPnSrcId is valid. /// /// @returns Success if the metadata controls on the given vidPnSrcId was successfully polled. /// Otherwise, one of the following erros may be returned: /// + ErrorInvalidValue if vidPnSrcId is invalid (out of range) /// + ErrorUnavailable if no implementation on current platform or if metadata shared buffer is null. virtual Result PollFullScreenFrameMetadataControl( uint32 vidPnSrcId, PerSourceFrameMetadataControl* pFrameMetadataControl) const = 0; /// Get flip status flags and a flag indicating if current device owns the flags. (DX only) /// /// The function is used by clients that need flip status polling through KMD-UMD shared memory. /// It provides caller the FlipStatusFlags on the given VidPnSource. Note that the flag returned is only a hint, /// and can have a one frame delay during flip status transition. /// /// @param [in] vidPnSrcId Video present source id. /// @param [out] pFlipFlags Output flip flags on given vidPnSrcId. Must not be null. /// @param [out] pIsFlipOwner Output indicating if the current device owns the flip flags. /// /// @returns Success if flipping flags on given vidPnSrcId was successfully polled. /// Otherwise, one of the following errors may be returned: /// + ErrorInvalidValue if vidPnSrcId is invalid. /// + ErrorUnavailable if no implementation on current platform. /// + ErrorInitializationFailed if flip status shared buffer was failed to initialize. virtual Result GetFlipStatus( uint32 vidPnSrcId, FlipStatusFlags* pFlipFlags, bool* pIsFlipOwner) const = 0; /// Resets the specified set of fences. /// /// All fences must be reset before passing them to a submission command. /// /// @param [in] fenceCount Number of fences to reset. /// @param [in] ppFences Array of fences to reset. /// /// @returns Success if the specified fences have been successfully reset. /// /// @note The function assumes that neither ppFences is null nor that any of the elements of the array pointed by /// ppFences are null. virtual Result ResetFences( uint32 fenceCount, IFence*const* ppFences) const = 0; /// Stalls the current thread until one or all of the specified fences have been reached by the device. /// /// If waitAll is true all fences must have been submitted at least once before this is called; /// otherwise at least one fence must have been submitted. Using a zero timeout value returns /// immediately and can be used to determine the status of a set of fences without stalling. /// /// @param [in] fenceCount Number of fences to wait for (i.e., size of the ppFences array). /// @param [in] ppFences Array of fences to be waited on. /// @param [in] waitAll If true, wait for completion of all fences in the array before returning; if false, /// return after any single fence in the array has completed. /// @param [in] timeout This method will return after this many nanoseconds even if the fences do not complete. /// /// @returns Success if the specified fences have been reached, or Timeout if the fences have not been reached but /// the specified timeout time has elapsed. Otherwise, one of the following errors may be returned: /// + ErrorInvalidPointer if: /// - ppFences is null. /// - Any member of the ppFences array is null. /// + ErrorInvalidValue if: /// - fenceCount is zero. /// + ErrorFenceNeverSubmitted if: /// - Any of the specified fences haven't been submitted. virtual Result WaitForFences( uint32 fenceCount, const IFence*const* ppFences, bool waitAll, std::chrono::nanoseconds timeout) const = 0; /// Stalls the current thread until one or all of the specified Semaphores have been reached by the device. /// /// Using a zero timeout value returns immediately and can be used to determine the status of a set of semaphores /// without stalling. /// /// @param [in] semaphoreCount Number of semaphores to wait for (i.e., size of the ppFences array). /// @param [in] ppSemaphores Array of semaphores to be waited on. /// @param [in] pValues Array of semaphores's value to be waited on. /// @param [in] flags Combination of zero or more @ref HostWaitFlags values describing the behavior of this /// wait operation. See @ref HostWaitFlags for more details. /// @param [in] timeout This method will return after this many nanoseconds even if the semaphores do not /// complete. /// /// @returns Success if the specified semaphores have been reached, or Timeout if the semaphores have not been /// reached but the specified timeout time has elapsed. Otherwise, one of the following errors may be /// returned: /// + ErrorInvalidPointer if: /// - ppSemaphores is null. /// - Any member of the ppSemaphores array is null. /// + ErrorInvalidValue if: /// - semaphoreCount is zero. virtual Result WaitForSemaphores( uint32 semaphoreCount, const IQueueSemaphore*const* ppSemaphores, const uint64* pValues, uint32 flags, std::chrono::nanoseconds timeout) const = 0; /// Correlates a GPU timestamp with the corresponding CPU timestamps, for tighter CPU/GPU timeline synchronization /// /// @param [out] pCalibratedTimestamps Reports a current GPU timestamp along with the CPU timestamps at the time /// that GPU timestamp was written. The CPU timestamps are OS-specific. Also /// reports a maximum deviation between the captured timestamps in nanoseconds. /// /// @returns Success if the request was successful. Otherwise, one of the following errors may be returned: /// + ErrorInvalidPointer if: /// - pCalibratedTimestamps is null. /// + ErrorUnavailable if: /// - unable to capture timestamps for all requested time domains. virtual Result GetCalibratedTimestamps( CalibratedTimestamps* pCalibratedTimestamps) const = 0; /// Binds the specified GPU memory as a trap handler for the specified pipeline type. This GPU memory must hold /// shader machine code (i.e., the client must generate HW-specific shader binaries through some external means, /// probably the SP3 assembler). /// /// The same trap handler will be installed for all shader stages that are part of the pipeline. A trap handler /// will only ever be executed for shaders that set the trapPresent bit in @ref PipelineShaderInfo. /// /// @param [in] pipelineType Select compute or graphics pipeline. If graphics, this trap handler will be installed /// for _all_ hardware shader stages. /// @param [in] pGpuMemory GPU memory allocation holding the trap handler. /// @param [in] offset Offset in bytes into pGpuMemory where the trap handler shader code begins. Must be /// 256 byte aligned. virtual void BindTrapHandler( PipelineBindPoint pipelineType, IGpuMemory* pGpuMemory, gpusize offset) = 0; /// Binds the specified GPU memory location as a trap buffer for the specified pipeline type. This GPU memory will /// be available to the trap handler as scratch memory to use as it chooses. The same trap buffer will be installed /// for all shader stages that are part of the pipeline. /// /// There is no size parameter for the trap buffer. The client is responsible for ensuring that the trap handler /// only reads/writes data within the bounds designated for trap buffer usage. /// /// @param [in] pipelineType Select compute or graphics pipeline. If graphics, this trap buffer will be installed /// for _all_ hardware shader stages. /// @param [in] pGpuMemory GPU memory allocation holding the trap buffer range. /// @param [in] offset Offset in bytes into pGpuMemory where the trap buffer range starts. Must be 256 byte /// aligned. virtual void BindTrapBuffer( PipelineBindPoint pipelineType, IGpuMemory* pGpuMemory, gpusize offset) = 0; /// Get the swap chain information for creating a swap chain and presenting an image. /// /// @param [in] hDisplay Display handle of the local window system. /// @param [in] hWindow Window handle of the local window system. /// @param [in] wsiPlatform WSI Platform the swapchain supposed to work on /// @param [in,out] pSwapChainProperties Contains swap chain information. /// /// @returns Success if get swap chain information successfully. Otherwise, one of the following errors may be /// returned: /// + ErrorUnknown if an unexpected internal error occurs. virtual Result GetSwapChainInfo( OsDisplayHandle hDisplay, OsWindowHandle hWindow, WsiPlatform wsiPlatform, SwapChainProperties* pSwapChainProperties) = 0; /// Determines if the given window system requirement is supported by the underlying wsiPlatform. /// /// @param [in] hDisplay Display handle of the local window system. /// @param [in] wsiPlatform WSI Platform the request supposed to send to /// @param [in] visualId Requested visual information which may not needed for some wsiPlatforms /// /// @returns Success if the request is supported. Otherwise, one of the following erros may be returned: /// + Unsupported virtual Result DeterminePresentationSupported( OsDisplayHandle hDisplay, WsiPlatform wsiPlatform, int64 visualId) = 0; /// Returns a mask of SwapChainModeSupport flags for each present mode. The swapchain modes are different for each /// WsiPlatform. /// /// @param [in] wsiPlatform WSI Platform the swapchain is supposed to work on. /// @param [in] mode The swap chain will use this present mode. /// /// @returns Returns a mask of SwapChainModeSupport. virtual uint32 GetSupportedSwapChainModes( WsiPlatform wsiPlatform, PresentMode mode) const = 0; /// Returns a mask of SwapChainModeSupport flags for each present mode. The swapchain modes are different for each /// WsiPlatform. /// /// @param [in] wsiPlatform WSI Platform the swapchain is supposed to work on. /// @param [in] mode The swap chain will use this present mode. /// /// @returns Returns a mask of SwapChainModeSupport. virtual uint32 GetCompatiblePresentModes( WsiPlatform wsiPlatform, SwapChainMode mode) const = 0; /// Determines if the given information corresponds to an external shared image. /// /// Some clients may not know if a given external shared resource is a simple GPU memory allocation or an image; it /// is expected they will call this function to determine which set of open functions they must call. /// /// @param [in] openInfo The open info describing the external shared resource. /// @param [out] pIsImage Its contents will be set to true if the external shared resource is an image. /// /// @returns Success if PAL was able to determine whether or not the resource is an image. Otherwise, one of the /// following errors may be returned: /// + ErrorInvalidPointer if pIsImage is null. /// + ErrorUnknown if an unexpected internal error occurs. virtual Result DetermineExternalSharedResourceType( const ExternalResourceOpenInfo& openInfo, bool* pIsImage) const = 0; /// @name FactoryMethods Device Factory Methods /// /// The following set of IDevice methods is the interface through which almost all PAL objects are created. /// /// PAL does not allocate its own system memory for these objects. Instead, the client must query the amount of /// system memory required for the object then provide a pointer where PAL will construct the object. /// /// This approach allows the client to roll the PAL object into its own allocations without unnecessary heap /// allocations and cache misses. It can also allocate many objects in a single memory space without PAL /// involvement. /// /// @{ /// Determines the amount of system memory required for a queue object. An allocation of this amount of memory /// must be provided in the pPlacementAddr parameter of CreateQueue(). /// /// @param [in] createInfo Properties of the new queue such as engine type and engine index. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. /// /// @returns Size, in bytes, of system memory required for an IQueue object with the specified properties. /// A return value of 0 indicates the createInfo was invalid. virtual size_t GetQueueSize( const QueueCreateInfo& createInfo, Result* pResult) const = 0; /// Creates a queue object. /// /// @param [in] createInfo Properties of the new queue such as engine type and engine index. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetQueueSize() with the same /// create info. /// @param [out] ppQueue Constructed queue object. When successful, the returned address will be the same /// as specified in pPlacementAddr. /// /// @returns Success if the queue was successfully created. Otherwise, one of the following errors may be returned: /// + ErrorInvalidPointer if pPlacementAddr or ppQueue is null. /// + ErrorInvalidValue if the create info's engineType is invalid or if the engineIndex is invalid. virtual Result CreateQueue( const QueueCreateInfo& createInfo, void* pPlacementAddr, IQueue** ppQueue) = 0; /// Determines the amount of system memory required for a multi-queue object. An allocation of this amount of /// memory must be provided in the pPlacementAddr parameter of CreateMultiQueue(). /// /// @param [in] queueCount Number of queues in the gang; matches number of entries in pCreateInfo. /// @param [in] pCreateInfo Properties of each queue to create for this gang (engine type, etc.). The first /// entry in this array describes the master queue which will be used to execute all /// IQueue interfaces except for MultiSubmit(). /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. /// /// @returns Size, in bytes, of system memory required for an multi-queue IQueue object with the specified /// properties. A return value of 0 indicates the createInfo was invalid. virtual size_t GetMultiQueueSize( uint32 queueCount, const QueueCreateInfo* pCreateInfo, Result* pResult) const = 0; /// Creates a multi-queue (i.e., gang submission queue) object. The resulting version of the IQueue interface /// is composed of multiple hardware queues which can be atomically submitted to as a group. When this is done, /// it is safe to use IGpuEvent objects to tightly synchronize work done across queues in a single call to Submit(). /// This can allow the client to tightly schedule asynchronous workloads for maximum efficiency that isn't possible /// across queues using IQueueSemaphore objects. /// /// @param [in] queueCount Number of queues in the gang; matches number of entries in the pCreateInfo array. /// @param [in] pCreateInfo Properties of each queue to create for this gang (engine type, etc.). The first /// entry in this array describes the master queue which will be used to execute all /// IQueue interfaces except for the ganged-portion of a Submit() (e.g., Present()). /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetMultiQueueSize() with the same /// arguments. /// @param [out] ppQueue Constructed multi queue object. /// /// @returns Success if the multi queue was successfully created. Otherwise, one of the following errors may be /// returned: /// + ErrorInvalidValue if queueCount is less than 2. /// + ErrorInvalidQueueType if any of the created sub-queues are not multi-queue compatible. This is /// indicated by the supportsMultiQueue engineProperties flag in @ref DeviceProperties. /// + ErrorInvalidPointer if pCreateInfo, pPlacementAddr or ppQueue is null. /// + ErrorInvalidValue if any create info's configuration is invalid. virtual Result CreateMultiQueue( uint32 queueCount, const QueueCreateInfo* pCreateInfo, void* pPlacementAddr, IQueue** ppQueue) = 0; /// Determines the amount of system memory required for a GPU memory object. /// /// An allocation of this amount of memory must be provided in the pPlacementAddr parameter of CreateGpuMemory(). /// /// @param [in] createInfo Data controlling the GPU memory properties, such as size, alignment, and allowed heaps. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. /// /// @returns Size, in bytes, of system memory required for an IGpuMemory object with the specified properties. A /// return value of 0 indicates the createInfo was invalid. virtual size_t GetGpuMemorySize( const GpuMemoryCreateInfo& createInfo, Result* pResult) const = 0; /// Creates an @ref IGpuMemory object with the requested properties. /// /// This method can create either _real_ or _virtual_ GPU memory allocations. /// /// @param [in] createInfo Data controlling the GPU memory properties, such as size, alignment, and allowed /// heaps. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetGpuMemorySize() with the same /// createInfo param. /// @param [out] ppGpuMemory Constructed GPU memory object. When successful, the returned address will be the /// same as specified in pPlacementAddr. /// /// @returns Success if the GPU memory was successfully created. Otherwise, one of the following errors may be /// returned: /// + TooManyFlippableAllocations if the GPU memory was successfully created, but the client has reached /// the limit of flippable allocations for this Device. This is a warning that future flippable GPU /// memory may fail to be created due to internal OS limitations. /// + ErrorInvalidPointer if pPlacementAddr or ppGpuMemory is null. /// + ErrorInvalidMemorySize if createInfo.size is invalid. /// + ErrorInvalidAlignment if createInfo.alignment is invalid. /// + ErrorInvalidValue if createInfo.heapCount is 0 for real allocations or non-0 for virtual allocations. /// + ErrorOutOfGpuMemory if the allocation failed due to a lack of GPU memory. /// + ErrorUnavailable if the Reserve Gpu Virtual Address failed. virtual Result CreateGpuMemory( const GpuMemoryCreateInfo& createInfo, void* pPlacementAddr, IGpuMemory** ppGpuMemory) = 0; /// Determines the amount of system memory required for a pinned GPU memory object. /// /// An allocation of this amount of memory must be provided in the pPlacementAddr parameter of /// CreatePinnedGpuMemory(). /// /// @param [in] createInfo Data controlling the GPU memory properties, such as size and the allocation to pin. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. /// /// @returns Size, in bytes, of system memory required for an IGpuMemory object pinned with the specified /// properties. virtual size_t GetPinnedGpuMemorySize( const PinnedGpuMemoryCreateInfo& createInfo, Result* pResult) const = 0; /// Pins a segment of system memory in place and create an @ref IGpuMemory object allowing access by the GPU. /// /// @param [in] createInfo Data controlling the GPU memory properties, such as size and the allocation to pin. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetPinnedGpuMemorySize() with the /// same params. /// @param [out] ppGpuMemory Constructed GPU memory object. When successful, the returned address will be the /// same as specified in pPlacementAddr. /// /// @returns Success if the system memory was successfully pinned and a corresponding GPU memory object was created. /// Otherwise, one of the following errors may be returned: /// + ErrorInvalidPointer if createInfo.pSysMem, pPlacementAddr, or ppGpuMemory is null, or if /// createInfo.pSysMem is not allocation granularity aligned. /// + ErrorInvalidMemorySize if createInfo.memSize is not allocation granularity aligned. /// + ErrorOutOfMemory if the creation failed because the system memory could not be pinned. virtual Result CreatePinnedGpuMemory( const PinnedGpuMemoryCreateInfo& createInfo, void* pPlacementAddr, IGpuMemory** ppGpuMemory) = 0; /// Determines the amount of system memory required for a SVM memory object /// /// An allocation of this amount of memory must be provided in the pPlacementAddr parameter of /// CreateSvmGpuMemory(). /// /// @param [in] createInfo Data controlling the SVM memory properties, such as size and type of SVM buffer. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. /// /// @returns Size, in bytes, of system memory required for an IGpuMemory object with the specified properties. virtual size_t GetSvmGpuMemorySize( const SvmGpuMemoryCreateInfo& createInfo, Result* pResult) const = 0; /// Creates an SVM (Shared Virtual Memory) IGpuMemory object. /// The basic idea of SVM is to create system memory that has the same CPU and GPU virtual address /// (i.e., "pointer is a pointer"). This can work in two modes: fine-grain, or coarse-grain. /// /// Fine-grain (Single-GPU): The client should just call this function with pReservedGpuVaOwner set to null. /// PAL will allocate GPU-accessible system memory that will have the same CPU virtual address /// (as returned by IGpuMemory::Map()) as GPU virtual address /// (as returned in the gpuVirtAddr value returned by IGpuMemory::Desc()). /// /// Fine-grain (MGPU): The client can call this function with pReservedGpuVaOwner set to IGpuMemory object /// allocated on the first device and receive mapping to the same GPU VA location on another device. /// /// Coarse-grain: In this mode, there are actually two separate IGpuMemory objects. /// The client should first create the "local" GPU memory object by calling IDevice::CreateGpuMemory() to create /// a standard GPU memory object in the VaRange::Svm VA space. Next, the client should create the "staging" GPU /// memory by calling this function (CreateSvmGpuMemory) with pReserveGpuVaOwner pointing to the "local" GPU /// memory object. PAL will create system memory for the "staging" GPU memory with a CPU virtual address matching /// the "local" GPU memory's GPU virtual address. The GPU virtual address of the "staging" GPU memory is /// arbitrarily assigned. The client is responsible for managing the contents of the two related allocations /// per their API rules. /// /// @param [in] createInfo Data controlling the SVM memory properties, such as size and location of SVM buffer. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetSvmGpuMemorySize() with the /// same params. /// @param [out] ppGpuMemory Constructed GPU memory object. When successful, the returned address will be the /// same as specified in pPlacementAddr. /// /// @returns Success if the SVM buffer was successfully created and a corresponding GPU memory object was created. /// Otherwise, one of the following errors may be returned: /// + ErrorInvalidPointer if pPlacementAddr, or ppGpuMemory is null. /// + ErrorInvalidMemorySize if createInfo.memSize is not allocation granularity aligned. /// + ErrorOutOfMemory if the creation failed because there is not enough GPU memory /// or the system memory could not be pinned. virtual Result CreateSvmGpuMemory( const SvmGpuMemoryCreateInfo& createInfo, void* pPlacementAddr, IGpuMemory** ppGpuMemory) = 0; /// Determines the amount of system memory required for a GPU memory object created by opening an allocation from a /// different GPU. /// /// An allocation of this amount of memory must be provided in the pPlacementAddr parameter of /// OpenSharedGpuMemory(). /// /// @param [in] openInfo Specifies a handle to a shared GPU memory object to open. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. /// /// @returns Size, in bytes, of system memory required for opening a shared IGpuMemory object with the specified /// properties. A return value of 0 indicates the openInfo was invalid. virtual size_t GetSharedGpuMemorySize( const GpuMemoryOpenInfo& openInfo, Result* pResult) const = 0; /// Opens a shareable GPU memory object created on another device for use on this device. /// /// @param [in] openInfo Specifies a handle to a shared GPU memory object to open. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetSharedGpuMemorySize() with the /// same params. /// @param [out] ppGpuMemory Constructed GPU memory object. When successful, the returned address will be the /// same as specified in pPlacementAddr. /// /// @returns Success if the shared memory was successfully opened for access on this device. Otherwise, one of the /// following errors may be returned: /// + ErrorInvalidPointer if pPlacementAddr or ppGpuMemory is null. /// + ErrorNotShareable if the specified memory object was not marked as shareable on creation. virtual Result OpenSharedGpuMemory( const GpuMemoryOpenInfo& openInfo, void* pPlacementAddr, IGpuMemory** ppGpuMemory) = 0; /// Determines the amount of system memory required for a external GPU memory object created by opening /// an allocation from a compatible device, such as D3D device. /// /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. /// /// @returns Size, in bytes, of system memory required for opening a shared IGpuMemory object with the specified /// properties. A return value of 0 indicates the openInfo was invalid. virtual size_t GetExternalSharedGpuMemorySize( Result* pResult) const = 0; /// Opens an external shared memory object which is created by a compatible device, such as D3D device. /// There could be more than one underlying allocations in the shared memory object, only one allocation /// created on the device's GPU will be opened, other allocations will be ignored. /// /// @param [in] openInfo Open info. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetExternalSharedGpuMemorySize() /// with the same params. /// @param [out] pMemCreateInfo Return CreateInfo of the external shared GPU memory. /// @param [out] ppGpuMemory Constructed GPU memory object. When successful, the returned address will be the /// same as specified in pPlacementAddr. /// /// @returns Success if the shared memory was successfully opened for access on this device. Otherwise, one of the /// following errors may be returned: /// + ErrorInvalidPointer if pPlacementAddr, ppGpuMemory or ppGpuMemory is null. /// + ErrorNotShareable if none of allocations in the shared memory object is created on the device's GPU. virtual Result OpenExternalSharedGpuMemory( const ExternalGpuMemoryOpenInfo& openInfo, void* pPlacementAddr, GpuMemoryCreateInfo* pMemCreateInfo, IGpuMemory** ppGpuMemory) = 0; /// Determines the amount of system memory required for a proxy GPU memory object to a GPU memory object on a /// different GPU. An allocation of this amount of memory must be provided in the pPlacementAddr parameter of /// OpenPeerGpuMemory(). /// /// @param [in] openInfo Specifies a handle to a GPU memory object to open for peer-to-peer transfer access. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. /// /// @returns Size, in bytes, of system memory required for opening a peer IGpuMemory object with the specified /// properties. A return value of 0 indicates the openInfo was invalid. virtual size_t GetPeerGpuMemorySize( const PeerGpuMemoryOpenInfo& openInfo, Result* pResult) const = 0; /// Opens previously created GPU memory object for peer access on another device. /// /// @param [in] openInfo Specifies a handle to a shared GPU memory object to open. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetPeerGpuMemorySize() with the same /// params. /// @param [out] ppGpuMemory Constructed GPU memory object. When successful, the returned address will be the /// same as specified in pPlacementAddr. /// /// @returns Success if the memory was successfully opened for peer access on this device. Otherwise, one of the /// following errors may be returned: /// + ErrorInvalidPointer if pPlacementAddr, ppGpuMemory, or openInfo.pOriginalMem is null. virtual Result OpenPeerGpuMemory( const PeerGpuMemoryOpenInfo& openInfo, void* pPlacementAddr, IGpuMemory** ppGpuMemory) = 0; /// Determines the amount of system memory required for an image object. An allocation of this amount of memory /// must be provided in the pPlacementAddr parameter of CreateImage(). /// /// @param [in] createInfo Properties of the new image such as pixel format and dimensions. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. /// /// @returns Size, in bytes, of system memory required for an @ref IImage object with the specified properties. A /// return value of 0 indicates the createInfo was invalid. virtual size_t GetImageSize( const ImageCreateInfo& createInfo, Result* pResult) const = 0; /// Determines if the image will have faster clone copy (than normal CS or graphics copy) in @ref CmdCopyImage(). /// If this function returns true, clients can set @ref ImageCreateInfo::flags::cloneable = 1 to enable clone /// copy support in @ref CmdCopyImage(); however whether @ref CmdCopyImage() goes through clone copy or not depends /// on multiple factors, please see doxygen in @ref CmdCopyImage() for more details. /// /// Note that clients are free to set this flag to 1 regardless; it still enables clone copy potentially but can't /// guarantee better performance. If clients find more faster clone copy cases, feel free to improve this function /// to cover these kinds of images. /// /// Due to rely on @ref ImageCreateInfo values to make decision, ideally clients should set all other members /// for @ref ImageCreateInfo first, and then call this function to setup cloneable flag last before creating image. /// /// @param [in] createInfo Properties of the new image. /// /// @returns true if image prefers clone copy in CmdCopyImage(); otherwise goes through normal copy. virtual bool ImagePrefersCloneCopy( const ImageCreateInfo& createInfo) const = 0; /// Creates an @ref IImage object with the requested properties. /// /// @param [in] createInfo Properties of the new image such as pixel format and dimensions. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetImageSize() with the same /// createInfo param. /// @param [out] ppImage Constructed image object. When successful, the returned address will be the same as /// specified in pPlacementAddr. /// /// @returns Success if the image was successfully created. Otherwise, one of the following errors may be returned: /// + ErrorInvalidValue if: /// - The image dimensions are invalid based on the image type. /// - The image dimensions are not properly aligned for compressed formats. /// - The number of samples is invalid for the image type and format. /// - MSAA is enabled for an image that doesn't support color or depth usage. /// - MSAA images have more than one mip level. /// - The array size is zero, non-1 for 3D images, or beyond the max number of slices for 1D or 2D /// images. /// - The number of mipmaps is invalid for the image dimensions. /// + ErrorInvalidPointer if pPlacementAddr or ppImage is null. /// + ErrorInvalidFormat if: /// - The format doesn't support the usage flags. /// - A 1D image specifies a compressed format. /// + ErrorInvalidFlags if: /// - The color target and depth/stencil usages are specified simultaneously. /// - The color target flag is set for a 1D image. /// - The depth/stencil flag is set for a non-2D image. virtual Result CreateImage( const ImageCreateInfo& createInfo, void* pPlacementAddr, IImage** ppImage) = 0; /// Determines the amount of system memory required for a presentable image object (and an associated memory /// object). Allocations of these amounts of memory must be provided in the pImagePlacementAddr and /// pGpuMemoryPlacementAddr parameters of CreatePresentableImage(). /// /// Only images created through this interface are valid sources for IQueue::Present(). /// /// @param [in] createInfo Properties of the image to create such as width/height and pixel format. /// @param [out] pImageSize Size, in bytes, of system memory required for the IImage. /// Should be specified to the pImagePlacementAddr argument of CreatePresentableImage(). /// @param [out] pGpuMemorySize Size, in bytes, of system memory required for a IGpuMemory object attached to the /// presentable IImage. Should be specified to the pGpuMemoryPlacementAddr argument /// of CreatePresentableImage(). /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. virtual void GetPresentableImageSizes( const PresentableImageCreateInfo& createInfo, size_t* pImageSize, size_t* pGpuMemorySize, Result* pResult) const = 0; /// Creates a presentable image. Presentable image must have internally bound GPU memory allocated as OS needs the /// information of image/memory via OS callbacks. /// /// @param [in] createInfo Properties of the image to create such as width/height and pixel format. /// @param [in] pImagePlacementAddr Pointer to the location where PAL should construct this object. There must /// be as much size available here as reported by calling /// GetPresentableImageSizes(). /// @param [in] pGpuMemoryPlacementAddr Pointer to the location where PAL should construct a IGpuMemory associated /// with this presentable image. There must be as much size available here as /// reported by calling GetPresentableImageSizes(). /// @param [out] ppImage Constructed image object. /// @param [out] ppGpuMemory Constructed memory object. This object is only valid for specifying in a /// memory reference list. It must be destroyed when the image is destroyed. /// /// @returns Success if the image was successfully created. Otherwise, one of the following errors may be returned: /// + TooManyFlippableAllocations if the image was successfully created, but the client has reached the /// limit of flippable allocations for this Device. This is a warning that future presentable Images /// may fail to be created due to internal OS limitations. /// + ErrorTooManyPresentableImages if the swap chain cannot be associated with more presentable images. /// + ErrorInvalidPointer if pImagePlacementAddr, pGpuMemoryPlacementAddr, ppImage, or ppGpuMemory is null. /// + ErrorInvalidValue if: /// - The image dimensions are invalid. /// - The refresh rate is invalid for a fullscreen image. /// + ErrorInvalidFormat if the format doesn't support presentation. virtual Result CreatePresentableImage( const PresentableImageCreateInfo& createInfo, void* pImagePlacementAddr, void* pGpuMemoryPlacementAddr, IImage** ppImage, IGpuMemory** ppGpuMemory) = 0; /// Determines the amount of system memory required for an image object (and an associated memory object) opened for /// peer access to an image created on another GPU. Allocations of these amounts of memory must be provided in the /// pImagePlacementAddr and pGpuMemoryPlacementAddr parameters of OpenPeerImage(). /// /// @param [in] openInfo Specifies the image to be opened for peer access from another GPU. /// @param [out] pPeerImageSize Size, in bytes, of system memory required for a peer IImage. Should be /// specified to the pImagePlacementAddr argument to OpenPeerImage(). /// @param [out] pPeerGpuMemorySize Size, in bytes, of system memory required for a dummy IGpuMemory object attached /// to a peer IImage. Should be specified to the pGpuMemoryPlacementAddr argument /// to OpenPeerImage(). /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid /// the additional validation. virtual void GetPeerImageSizes( const PeerImageOpenInfo& openInfo, size_t* pPeerImageSize, size_t* pPeerGpuMemorySize, Result* pResult) const = 0; /// Creates an @ref IImage object as a proxy to an IImage on another GPU to be used for peer-to-peer transfers. /// /// @note The @ref IImage object provided in the @ref PeerImageOpenInfo must be bound to an existing /// @ref IGpuMemory object prior to opening a peer version of it. /// If the new image needs to bind to an existing peer memory allocation, as when images are suballocated, /// pGpuMemoryPlacementAddr must equal nullptr and ppGpuMemory must point to an existing IGpuMemory object /// that was previously opened to reference the same memory from the current device. /// @param [in] openInfo Specifies the image to be opened for peer access from another GPU. /// @param [in] pImagePlacementAddr Pointer to the location where PAL should construct this object. There must /// be as much size available here as reported by calling GetPeerImageSizes(). /// @param [in] pGpuMemoryPlacementAddr If nonzero, this is a pointer to the location where PAL should construct a /// IGpuMemory to be associated with this peer image. There must be as much /// size available here as reported by calling GetPeerImageSizes(). /// @param [out] ppImage Constructed image object. /// @param [in] ppGpuMemory Constructed dummy memory object. This object is only valid for specifying /// in a memory reference list. /// /// @returns Success if the image was successfully created. Otherwise, one of the following errors may be returned: /// + ErrorInvalidPointer if pImagePlacementAddr, pGpuMemoryPlacementAddr, ppImage, ppGpuMemory, or /// openInfo.pOriginalImage is null. virtual Result OpenPeerImage( const PeerImageOpenInfo& openInfo, void* pImagePlacementAddr, void* pGpuMemoryPlacementAddr, IImage** ppImage, IGpuMemory** ppGpuMemory) = 0; /// Determines the amount of system memory required for an external shared image object (and an associated memory /// object). Allocations of these amounts of memory must be provided in the pImagePlacementAddr and /// pGpuMemoryPlacementAddr parameters of OpenExternalSharedImage(). /// /// @param [in] openInfo Specifies the external image to be opened. /// @param [out] pImageSize Size, in bytes, of system memory required for pImagePlacementAddr. /// @param [out] pGpuMemorySize Size, in bytes, of system memory required for pGpuMemoryPlacementAddr. /// @param [out] pImgCreateInfo If non-null, it will be filled out with information describing the shared image. /// @returns Success if the shared image was successfully opened for access on this device. Otherwise, one of the /// following errors may be returned: /// + ErrorInvalidPointer if pImageSize or pGpuMemorySize is null. /// + ErrorNotShareable if none of allocations in the shared image is created on the device's GPU. virtual Result GetExternalSharedImageSizes( const ExternalImageOpenInfo& openInfo, size_t* pImageSize, size_t* pGpuMemorySize, ImageCreateInfo* pImgCreateInfo) const = 0; /// Opens an external shared image object which was created by a compatible device, such as D3D device. /// There could be more than one underlying allocations in the shared image object, only one allocation /// created on the device's GPU will be opened, other allocations will be ignored. /// /// @param [in] openInfo Specifies the external image to be opened. /// @param [in] pImagePlacementAddr Pointer to the location where PAL should construct the image object. /// There must be as much space available here as reported by calling /// GetExternalSharedImageSizes() with the same params. /// @param [in] pGpuMemoryPlacementAddr Pointer to the location where PAL should construct the GPU memory object. /// There must be as much space available here as reported by calling /// GetExternalSharedImageSizes() with the same params. /// @param [out] pMemCreateInfo If non-null, it is filled with information describing the external GPU /// memory that backs this external image. /// @param [out] ppImage Constructed image object. When successful, the returned address will /// be the same as specified in pImagePlacementAddr. /// @param [out] ppGpuMemory Constructed GPU memory object. When successful, the returned address will /// be the same as specified in pGpuMemoryPlacementAddr. /// @returns Success if the shared image was successfully opened for access on this device. Otherwise, one of the /// following errors may be returned: /// + ErrorInvalidPointer if pImagePlacementAddr, pGpuMemoryPlacementAddr, ppImage or ppGpuMemory is null. /// + ErrorNotShareable if none of allocations in the shared image object is created on the device's GPU. virtual Result OpenExternalSharedImage( const ExternalImageOpenInfo& openInfo, void* pImagePlacementAddr, void* pGpuMemoryPlacementAddr, GpuMemoryCreateInfo* pMemCreateInfo, IImage** ppImage, IGpuMemory** ppGpuMemory) = 0; /// Determines the amount of system memory required for a color target view object. An allocation of this amount of /// memory must be provided in the pPlacementAddr parameter of CreateColorTargetView(). /// /// Unlike most creation methods in this class, GetColorTargetViewSize() does not require a ColorTargetCreateInfo /// parameter. PAL must guarantee that all color target view objects are the same size in order to support DX12, /// where these views are treated similarly to SRDs. /// /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid /// the additional validation. /// /// @returns Size, in bytes, of system memory required for an IColorTargetView object. virtual size_t GetColorTargetViewSize( Result* pResult) const = 0; /// Creates an @ref IColorTargetView object with the requested properties. /// /// @param [in] createInfo Properties of the color target view to create. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetColorTargetViewSize(). /// @param [out] ppColorTargetView Constructed color target view object. When successful, the returned address will /// be the same as specified in pPlacementAddr. /// /// @returns Success if the color target view was successfully created. Otherwise, one of the following /// errors may be returned: /// + ErrorInvalidPointer if pPlacementAddr, ppColorTargetView, or createInfo.pImage is null. /// + ErrorInvalidValue if: /// - The base slice is invalid for the given image object and view type. /// - The number of array slices is zero or the range of slices is too large for the specified image. /// - The mip level is invalid for the given image object. /// + ErrorInvalidImage if the image object doesn't have the color target access flag set. virtual Result CreateColorTargetView( const ColorTargetViewCreateInfo& createInfo, void* pPlacementAddr, IColorTargetView** ppColorTargetView) const = 0; /// Determines the amount of system memory required for a depth/stencil view object. An allocation of this amount /// of memory must be provided in the pPlacementAddr parameter of CreateDepthStencilView(). /// /// Unlike most creation methods in this class, GetDepthStencilViewSize() does not require a /// DepthStencilViewCreateInfo parameter. PAL must guarantee that all color target view objects are the same size /// in order to support DX12, where these views are treated similarly to SRDs. /// /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid /// the additional validation. /// /// @returns Size, in bytes, of system memory required for an IDepthStencilView object. virtual size_t GetDepthStencilViewSize( Result* pResult) const = 0; /// Creates an @ref IDepthStencilView object with the requested properties. /// /// @param [in] createInfo Properties of the depth/stencil view to create. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be /// as much size available here as reported by calling GetDepthStencilViewSize(). /// @param [out] ppDepthStencilView Constructed depth/stencil view object. When successful, the returned address /// will be the same as specified in pPlacementAddr. /// /// @returns Success if the depth/stencil view was successfully created. Otherwise, one of the following errors may /// be returned: /// + ErrorInvalidPointer if pPlacementAddr, ppDepthStencilView, or createInfo.pImage is null. /// + ErrorInvalidValue if: /// - The base slice is invalid for the given image object and view type. /// - The number of array slices is zero or the range of slices is too large for the specified image. /// - The mip level is invalid for the given image object. /// + ErrorInvalidImage if the image object doesn't have the depth/stencil target access flag set. virtual Result CreateDepthStencilView( const DepthStencilViewCreateInfo& createInfo, void* pPlacementAddr, IDepthStencilView** ppDepthStencilView) const = 0; /// Creates one or more typed buffer view _shader resource descriptors (SRDs)_ in memory provided by the client. /// /// The client is responsible for providing _count_ times the amount of memory reported by srdSizes.typedBufferView /// in DeviceProperties, and must also ensure the provided memory is aligned to the size of one SRD. /// /// The SRD can be created in either system memory or pre-mapped GPU memory. If updating GPU memory, the client /// must ensure there are no GPU accesses of this memory in flight before calling this method. /// /// The generated buffer view SRD allows a range of a GPU memory allocation to be accessed by a shader, and should /// be setup based on shader usage as described in @ref BufferViewInfo. The client should put the resulting SRD /// in an appropriate location based on the shader resource mapping specified by the bound pipeline, either directly /// in user data (ICmdBuffer::CmdSetUserData()) or a table in GPU memory indirectly referenced by user data. /// /// For performance reasons, this method returns void and does minimal error-checking. However, in debug builds, /// to assist clients' debug efforts, the following conditions will be checked with runtime assertions: /// + If pBufferViewInfo or pOut, is null. /// + If count is 0. /// + If pBufferViewInfo[].format is Undefined. /// + If pBufferViewInfo[].stride does not match the size of an element of that format. /// + If pBufferViewInfo[].gpuAddr is 0. /// + If pBufferViewInfo[].gpuAddr is not properly aligned to Min(4, pBufferViewInfo[].stride). /// /// @param [in] count Number of buffer view SRDs to create; size of the pBufferViewInfo array. /// @param [in] pBufferViewInfo Array of buffer view descriptions directing SRD construction. /// @param [out] pOut Client-provided space where opaque, hardware-specific SRD data is written. /// /// @ingroup ResourceBinding void CreateTypedBufferViewSrds( uint32 count, const BufferViewInfo* pBufferViewInfo, void* pOut) const { m_pfnTable.pfnCreateTypedBufViewSrds(this, count, pBufferViewInfo, pOut); } /// Creates one or more untyped buffer view _shader resource descriptors (SRDs)_ in memory provided by the client. /// These SRDs can be accessed in a shader as either _raw_ or _structured_ views. /// /// The client is responsible for providing _count_ times the amount of memory reported by srdSizes.untypedBufferView /// in DeviceProperties, and must also ensure the provided memory is aligned to the size of one SRD. /// /// The SRD can be created in either system memory or pre-mapped GPU memory. If updating GPU memory, the client /// must ensure there are no GPU accesses of this memory in flight before calling this method. /// /// The generated buffer view SRD allows a range of a GPU memory allocation to be accessed by a shader, and should /// be setup based on shader usage as described in @ref BufferViewInfo. The client should put the resulting SRD /// in an appropriate location based on the shader resource mapping specified by the bound pipeline, either directly /// in user data (ICmdBuffer::CmdSetUserData()) or a table in GPU memory indirectly referenced by user data. /// /// For performance reasons, this method returns void and does minimal error-checking. However, in debug builds, /// to assist clients' debug efforts, the following conditions will be checked with runtime assertions: /// + If pBufferViewInfo or pOut, is null. /// + If count is 0. /// + If pBufferViewInfo[].format is not Undefined. /// + If pBufferViewInfo[].gpuAddr is 0. /// + If pBufferViewInfo[].gpuAddr is not properly aligned to Min(4, pBufferViewInfo[].stride). /// /// @param [in] count Number of buffer view SRDs to create; size of the pBufferViewInfo array. /// @param [in] pBufferViewInfo Array of buffer view descriptions directing SRD construction. /// @param [out] pOut Client-provided space where opaque, hardware-specific SRD data is written. /// /// @ingroup ResourceBinding void CreateUntypedBufferViewSrds( uint32 count, const BufferViewInfo* pBufferViewInfo, void* pOut) const { m_pfnTable.pfnCreateUntypedBufViewSrds(this, count, pBufferViewInfo, pOut); } /// Validates image view SRD input parameters. Error checking for image view SRDs is handled by a separate /// function for performance reasons and to avoid rechecking parameters that the client knows are correct when /// rebuilding SRDs. /// /// @param [in] viewInfo Input image view SRD parameter info. /// /// @returns Success if the parameters pass validation. Otherwise, one of the following errors may be returned: /// + ErrorImagePlaneUnavailable if the requested image plane specified in the view is not available on /// the image. /// + ErrorImageNotShaderAccessible if the image does not have a shader-readable or shader-writable usage. /// + ErrorInvalidFormatSwizzle if the view's channel swizzle specifies components not available in the view /// format. /// + ErrorInvalidBaseMipLevel if the view's start subresource has a mip level larger than the number of /// of available mip levels. /// + ErrorFormatIncompatibleWithImageFormat if the view's format is not compatible with the image's format. /// This can happen if: /// - For color plane views, the bit-depths of the two formats are not equal. /// + ErrorFormatIncompatibleWithImagePlane if the view's format is not compatible with the image's plane. /// This can happen if: /// - For depth plane views, the bit-depths of the view format and the depth component of the image /// are not equal. /// - For stencil plane views, the bit-depths of the view format and the stencil component of the image /// are not equal. /// + ErrorInvalidViewArraySize if: /// - The view array size is 0. /// - The image type is 3D and the view array size is not 1. /// + ErrorViewTypeIncompatibleWithImageType if: /// - The image type is 1D and the view type is not 1D /// - The image type is 2D and the view type is not 2D or cubemap /// - The image type is 3D and the view type is not 3D /// + ErrorInsufficientImageArraySize if the number of viewed array slices is more than available on /// the image. /// + ErrorCubemapIncompatibleWithMsaa if the view type is a cubemap view and the image has /// multiple samples. /// + ErrorCubemapNonSquareFaceSize if the view type is a cubemap view and the image 2D extents are not /// square. /// + ErrorInvalidViewBaseSlice /// - If the image type is 3D and the view base slice is not 0. /// /// @ingroup ResourceBinding virtual Result ValidateImageViewInfo(const ImageViewInfo& viewInfo) const = 0; /// Creates one or more image view _shader resource descriptors (SRDs)_ in memory provided by the client. /// /// The client is responsible for providing _count_ times the amount of memory reported by srdSizes.imageView /// in DeviceProperties, and must also ensure the provided memory is aligned to the size of one SRD. /// /// The SRD can be created in either system memory or pre-mapped GPU memory. If updating GPU memory, the client /// must ensure there are no GPU accesses of this memory in flight before calling this method. /// /// The generated image view SRD allows a set of subresources in an image to be accessed by a shader, and should /// be setup as described in @ref ImageViewInfo. The client should put the resulting SRD in an appropriate /// location based on the shader resource mapping specified by the bound pipeline, either directly in user data /// (ICmdBuffer::CmdSetUserData()) or a table in GPU memory indirectly referenced by user data. /// /// @warning SRDs for Planar YUV images will include padding if pImageViewInfo->subresRange.numSlices > 1 /// /// @param [in] count Number of buffer view SRDs to create; size of the pImageViewInfo array. /// @param [in] pImgViewInfo Array of image view descriptions directing SRD construction. /// @param [out] pOut Client-provided space where opaque, hardware-specific SRD data is written. /// /// @ingroup ResourceBinding void CreateImageViewSrds( uint32 count, const ImageViewInfo* pImgViewInfo, void* pOut) const { m_pfnTable.pfnCreateImageViewSrds(this, count, pImgViewInfo, pOut); } /// Validates an fmask view SRD input parameters. Error checking for fmask view SRDs is handled by a separate /// function for performance reasons and to avoid rechecking parameters that the client knows are correct when /// rebuilding SRDs. /// /// @param [in] viewInfo Input image view SRD parameter info. /// /// @returns Success if the parameters pass validation. Otherwise, one of the following errors may be returned: /// + ErrorImageFmaskUnavailable if the image does not have an FMask. /// + ErrorInvalidViewArraySize if the view array size is 0. /// + ErrorViewTypeIncompatibleWithImageType if the image type is not 2D. /// + ErrorInsufficientImageArraySize if the view base array slice and size define an out of bounds array range. /// /// @ingroup ResourceBinding virtual Result ValidateFmaskViewInfo(const FmaskViewInfo& viewInfo) const = 0; /// Creates one or more fmask view _shader resource descriptors (SRDs)_ in memory provided by the client. /// /// The client is responsible for providing _count_ times the amount of memory reported by srdSizes.fmaskView /// in DeviceProperties, and must also ensure the provided memory is aligned to the size of one SRD. /// /// The SRD can be created in either system memory or pre-mapped GPU memory. If updating GPU memory, the client /// must ensure there are no GPU accesses of this memory in flight before calling this method. /// /// The generated fmask view SRD allows a range of image slices to be accessed bo the load_fptr IL instruction, /// which allows a shader to read compressed MSAA data at the expense of a texture indirection. This SRD should be /// setup as described in @ref FmaskViewInfo. The client should put the resulting SRD in an appropriate /// location based on the shader resource mapping specified by the bound pipeline, either directly in user data /// (ICmdBuffer::CmdSetUserData()) or a table in GPU memory indirectly referenced by user data. /// /// @param [in] count Number of fmask view SRDs to create; size of the pFmaskViewInfo array. /// @param [in] pFmaskViewInfo Array of fmask view descriptions directing SRD construction. /// @param [out] pOut Client-provided space where opaque, hardware-specific SRD data is written. /// /// @ingroup ResourceBinding void CreateFmaskViewSrds( uint32 count, const FmaskViewInfo* pFmaskViewInfo, void* pOut) const { m_pfnTable.pfnCreateFmaskViewSrds(this, count, pFmaskViewInfo, pOut); } /// Validates a sampler SRD input parameters. Error checking for sampler SRDs is handled by a separate function for /// performance reasons and to avoid rechecking parameters that the client knows are correct when rebuilding SRDs. /// /// @param [in] samplerInfo Input sampler SRD parameter info. /// /// @returns Success if the parameters pass validation. Otherwise, one of the following errors may be returned: /// + ErrorInvalidValue if: /// - The max anisotropy or LOD bias value is outside of the legal range. /// - The min/max LOD values are outside the legal range or if the max LOD is smaller than the min LOD. /// - The border color palette index is out of the legal range. /// /// @ingroup ResourceBinding virtual Result ValidateSamplerInfo(const SamplerInfo& samplerInfo) const = 0; /// Creates one or more sampler _shader resource descriptors (SRDs)_ in memory provided by the client. /// /// The client is responsible for providing _count_ times the amount of memory reported by srdSizes.sampler in /// DeviceProperties, and must also ensure the provided memory is aligned to the size of one SRD. /// /// The SRD can be created in either system memory or pre-mapped GPU memory. If updating GPU memory, the client /// must ensure there are no GPU accesses of this memory in flight before calling this method. /// /// The generated sampler SRD controlls execution of sample instructions in a shader, and should be setup as /// described in @ref SamplerInfo. The client should put the resulting SRD in an appropriate location based on the /// shader resource mapping specified by the bound pipeline, either directly in user data /// (ICmdBuffer::CmdSetUserData()) or a table in GPU memory indirectly referenced by user data. /// /// @param [in] count Number of sampler SRDs to create; size of the pSamplerInfo array. /// @param [in] pSamplerInfo Array of sampler descriptions directing SRD construction. /// @param [out] pOut Client-provided space where opaque, hardware-specific SRD data is written. /// /// @returns Success if the sampler SRD data was successfully written to pOut. Otherwise, one of the following /// errors may be returned: /// + ErrorInvalidPointer if pSamplerInfo or pOut is null. /// + ErrorInvalidValue if: /// - The max anisotropy or LOD bias value is outside of the legal range. /// - The min/max LOD values are outside the legal range or if the max LOD is smaller than the min LOD. /// - The border color palette index is out of the legal range. /// /// @ingroup ResourceBinding void CreateSamplerSrds( uint32 count, const SamplerInfo* pSamplerInfo, void* pOut) const { m_pfnTable.pfnCreateSamplerSrds(this, count, pSamplerInfo, pOut); } /// Creates one or more _BVH resource descriptors (SRDs)_ in memory provided by the client. /// /// The client is responsible for providing _count_ times the amount of memory reported by srdSizes.bvhInfo in /// DeviceProperties, and must also ensure the provided memory is aligned to the size of one SRD. /// /// The SRD can be created in either system memory or pre-mapped GPU memory. If updating GPU memory, the client /// must ensure there are no GPU accesses of this memory in flight before calling this method. /// /// The generated BVH SRD controls execution of ray trace instructions in a shader, and should be setup as /// described in @ref BvhInfo. The client should put the resulting SRD in an appropriate location based on /// the shader resource mapping specified by the bound pipeline, either directly in user data /// (ICmdBuffer::CmdSetUserData()) or a table in GPU memory indirectly referenced by user data. /// /// @param [in] count Number of BVH SRDs to create; size of the pBvhInfo array. /// @param [in] pBvhInfo Array of BVH (bounding volume hierarchy) descriptions directing SRD construction. /// @param [out] pOut Client-provided space where opaque, hardware-specific SRD data is written. /// /// @returns Success if the sampler SRD data was successfully written to pOut. Otherwise, one of the following /// errors may be returned: /// + ErrorInvalidPointer if pBvhInfo or pOut is null. /// /// @ingroup ResourceBinding void CreateBvhSrds( uint32 count, const BvhInfo* pBvhInfo, void* pOut) const { m_pfnTable.pfnCreateBvhSrds(this, count, pBvhInfo, pOut); } /// Decode a buffer SRD back into most of the BufferViewInfo used to create it via Create{Typed,Untyped}BufferViewSrds. /// /// @param [in] pBufferViewSrd Pointer to the SRD. Should not be in dedicated GPU memory. /// @param [out] pViewInfo Pointer to memory to be filled with decoded info. void DecodeBufferViewSrd( const void* pBufferViewSrd, BufferViewInfo* pViewInfo) const { m_pfnTable.pfnDecodeBufferViewSrd(this, pBufferViewSrd, pViewInfo); } /// Decode an image SRD back into the main parameters used to create it via CreateImageViewSrds(). /// /// @param [in] image The same image the SRD was created on. /// @param [in] pImageViewSrd Pointer to the SRD. Should not be in dedicated GPU memory. /// @param [out] pDecodedInfo Pointer to memory to be filled with decoded info. void DecodeImageViewSrd( const IImage& image, const void* pImageViewSrd, DecodedImageSrd* pDecodedInfo) const { m_pfnTable.pfnDecodeImageViewSrd(this, &image, pImageViewSrd, pDecodedInfo); } /// The MSAA sample pattern palette is a client-managed table of sample patterns that might be in use by the app. /// /// The only purpose of this palette is to implement the samplepos shader instruction. This instruction returns the /// position of a particular sample based on the sample pattern of the current rasterizer state or a particular /// specified resource. When this instruction is executed, the shader will determine the correct palette index /// as specified in the pipeline (see samplePatternIdx in the rsState structure inside GraphicsPipelineCreateInfo) /// or in the image view SRD (see samplePatternIdx in ImageViewInfo). The shader will then return the position /// for the specified sample in the specified entry of the currently bound sample pattern palette as set with this /// function. /// /// The initial bound sample pattern palette values are undefined. A palette entry must be specified before it is /// referenced by a samplepos instruction. /// /// @warning The samplepos instruction and sample pattern palette both assume every pixel has the same pattern. /// This may not be accurate if the application uses custom sample patterns that aren't identical for /// every pixel in the quad. /// /// @param [in] palette Small set of sample patterns. Each sample pattern consists of an x,y tuple per sample /// describing where it is located in a pixel. The coordinate system is described in /// MsaaStateCreateInfo. /// /// @returns Success if the palette was successfully updated. An error is only possible due to an internal error, /// such as a failure to allocate GPU memory for a new table. virtual Result SetSamplePatternPalette( const SamplePatternPalette& palette) = 0; /// Determines the amount of system memory required for a border color palette object. An allocation of this amount /// of memory must be provided in the pPlacementAddr parameter of CreateBorderColorPalette(). /// /// @param [in] createInfo Border color palette creation info (specifies number of entries). /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. /// /// @returns Size, in bytes, of system memory required for an IBorderColorPalette object with the specified /// properties. A return value of 0 indicates the createInfo was invalid. virtual size_t GetBorderColorPaletteSize( const BorderColorPaletteCreateInfo& createInfo, Result* pResult) const = 0; /// Creates a border color palette object. /// /// @param [in] createInfo Border color palette creation info (number of entries). /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetBorderColorPaletteSize() with the /// same createInfo param. /// @param [out] ppPalette Constructed border color palette object. When successful, the returned address will /// be the same as specified in pPlacementAddr. /// /// @returns Success if the border color palette was successfully created. Otherwise, one of the following errors /// may be returned: /// + ErrorInvalidPointer if pPlacementAddr or ppPalette is null. /// + ErrorInvalidValue if the palette size is too large to be used on any queue on this device. virtual Result CreateBorderColorPalette( const BorderColorPaletteCreateInfo& createInfo, void* pPlacementAddr, IBorderColorPalette** ppPalette) const = 0; /// Determines the amount of system memory required for a compute pipeline object. An allocation of this amount of /// memory must be provided in the pPlacementAddr parameter of CreateComputePipeline(). /// /// @param [in] createInfo Pipeline properties including shaders and descriptor set mappings. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. /// /// @returns Size, in bytes, of system memory required for an IPipeline object with the specified properties. A /// return value of 0 indicates the createInfo was invalid. virtual size_t GetComputePipelineSize( const ComputePipelineCreateInfo& createInfo, Result* pResult) const = 0; /// Creates a compute @ref IPipeline object with the requested properties. /// /// @param [in] createInfo Pipeline properties including shaders and descriptor set mappings. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetComputePipelineSize() with the /// same createInfo param. /// @param [out] ppPipeline Constructed pipeline object. When successful, the returned address will be the /// same as specified in pPlacementAddr. /// /// @returns Success if the pipeline was successfully created. Otherwise, one of the following errors may be /// returned: /// + ErrorInvalidPointer if: /// - pPlacementAddr or ppPipeline is null. /// - A required shader pointer is null. /// - The link time constant data pointer is null. /// + ErrorInvalidValue if: /// - The link constant buffer info pointer isn't consistent with the link constant buffer count value. /// - The dynamic memory view mapping slot object type is not unused, resource, of UAV. /// + ErrorUnsupportedShaderIlVersion if an incorrect shader type is used in any shader stage. virtual Result CreateComputePipeline( const ComputePipelineCreateInfo& createInfo, void* pPlacementAddr, IPipeline** ppPipeline) = 0; /// Determines the amount of system memory required for a shader library object. An allocation of this amount of /// memory must be provided in the pPlacementAddr parameter of CreateShaderLibrary(). /// /// @param [in] createInfo Library creation parameters including ELF code object and other items. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additonal validation. /// /// @returns Size, in bytes, of system memory required for an IShaderLibrary object with the specified properties. /// A return value of zero indicates the createInfo was invalid. virtual size_t GetShaderLibrarySize( const ShaderLibraryCreateInfo& createInfo, Result* pResult) const = 0; /// Creates a @ref IShaderLibrary object with the requested properties. /// /// @param [in] createInfo Library creation parameters including ELF code object and other items. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetShaderLibrarySize() with the /// same createInfo parameter. /// @param [out] ppLibrary Constructed library object. When successful, the returned address will be the same /// as specified in pPlacementAddr. /// /// @returns Success if the library was successfully created. Otherwise, one of the following errors may be /// returned: /// + ErrorInvalidPointer if: /// - pPlacementAddr or ppLibrary is null. /// - Required code object pointer is null. virtual Result CreateShaderLibrary( const ShaderLibraryCreateInfo& createInfo, void* pPlacementAddr, IShaderLibrary** ppLibrary) = 0; /// Determines the amount of system memory required for a graphics pipeline object. An allocation of this amount of /// memory must be provided in the pPlacementAddr parameter of CreateGraphicsPipeline(). /// /// @param [in] createInfo Pipeline properties including shaders and descriptor set mappings. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid /// the additional validation. /// /// @returns Size, in bytes, of system memory required for an IPipeline object with the specified properties. A /// return value of 0 indicates the createInfo was invalid. virtual size_t GetGraphicsPipelineSize( const GraphicsPipelineCreateInfo& createInfo, Result* pResult) const = 0; /// Creates a graphics @ref IPipeline object with the requested properties. /// If this pipeline is created from a set of @IShaderLibrary objects, it is the caller's responsibility to ensure /// that the lifetime of the pipeline does not exceed the lifetime of the libraries themselves. /// /// @param [in] createInfo Pipeline properties including shaders and descriptor set mappings. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetGraphicsPipelineSize() with the /// same createInfo param. /// @param [out] ppPipeline Constructed pipeline object. When successful, the returned address will be the same /// as specified in pPlacementAddr. /// /// @returns Success if the pipeline was successfully created. Otherwise, one of the following errors may be /// returned: /// + ErrorInvalidPointer if: /// - pPlacementAddr or ppPipeline is null. /// - A required shader pointer is null. /// - The link time constant data pointer is null. /// + ErrorInvalidValue if: /// - The number of control points is invalid for a tessellation pipeline. /// - Logic operations are enabled while some of the color targets enable blending. /// - The dual source blend enable doesn't match expectations for color target and blend enable setup. /// - The link constant buffer info pointer isn't consistent with the link constant buffer count value. /// - The dynamic memory view mapping slot object type is not unused, resource, of UAV. /// + ErrorInvalidFormat if: /// - Blending is enabled by the color target format doesn't support blending. /// - Logic operations are enabled by an incompatible format is used. /// + ErrorUnsupportedShaderIlVersion if an incorrect shader type is used in any shader stage. virtual Result CreateGraphicsPipeline( const GraphicsPipelineCreateInfo& createInfo, void* pPlacementAddr, IPipeline** ppPipeline) = 0; /// Determines the amount of system memory required for a MSAA state object. An allocation of this amount of memory /// must be provided in the pPlacementAddr parameter of CreateMsaaState(). /// /// @returns Size, in bytes, of system memory required for an @ref IMsaaState object. /// This value will always be non-zero if the device has GfxIp support. virtual size_t GetMsaaStateSize() const = 0; /// Creates an @ref IMsaaState object with the requested properties. /// /// @param [in] createInfo Properties of the MSAA state object to create. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetMsaaStateSize() with the same /// createInfo param. /// @param [out] ppMsaaState Constructed MSAA state object. When successful, the returned address will be the /// same as specified in pPlacementAddr. /// /// @returns Success if the MSAA state was successfully created. Otherwise, one of the following errors may be /// returned: /// + ErrorInvalidPointer if pPlacementAddr or ppMsaaState is null. /// + ErrorInvalidValue if: /// - The number of samples is unsupported. virtual Result CreateMsaaState( const MsaaStateCreateInfo& createInfo, void* pPlacementAddr, IMsaaState** ppMsaaState) const = 0; /// Determines the amount of system memory required for a color blend state object. An allocation of this amount of /// memory must be provided in the pPlacementAddr parameter of CreateColorBlendState(). /// /// @returns Size, in bytes, of system memory required for an @ref IColorBlendState object. /// This value will always be non-zero if the device has GfxIp support. virtual size_t GetColorBlendStateSize() const = 0; /// Creates an @ref IColorBlendState object with the requested properties. /// /// @param [in] createInfo Properties of the color blend state object to create. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetColorBlendStateSize() with the /// same createInfo param. /// @param [out] ppColorBlendState Constructed color blend state object. When successful, the returned address will /// be the same as specified in pPlacementAddr. /// /// @returns Success if the color blend state was successfully created. Otherwise, one of the following errors may /// be returned: /// + ErrorInvalidPointer if pPlacementAddr or ppColorBlendState is null. /// + ErrorInvalidValue if: /// - An unsupported blend function is used with dual source blending. virtual Result CreateColorBlendState( const ColorBlendStateCreateInfo& createInfo, void* pPlacementAddr, IColorBlendState** ppColorBlendState) const = 0; /// Determines the amount of system memory required for a depth/stencil state object. An allocation of this amount /// of memory must be provided in the pPlacementAddr parameter of CreateDepthStencilState(). /// /// @returns Size, in bytes, of system memory required for an @ref IDepthStencilState object. /// This value will always be non-zero if the device has GfxIp support. virtual size_t GetDepthStencilStateSize() const = 0; /// Creates an @ref IDepthStencilState object with the requested properties. /// /// @param [in] createInfo Properties of the depth/stencil state object to create. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be /// as much size available here as reported by calling GetDepthStencilStateSize() /// with the same createInfo param. /// @param [out] ppDepthStencilState Constructed depth/stencil state object. When successful, the returned address /// will be the same as specified in pPlacementAddr. /// /// @returns Success if the depth/stencil state was successfully created. Otherwise, one of the following errors /// may be returned: /// + ErrorInvalidPointer if pPlacementAddr or ppDepthStencilState is null. /// + ErrorInvalidValue if: /// - Depth bounds is enabled and the depth range is invalid. virtual Result CreateDepthStencilState( const DepthStencilStateCreateInfo& createInfo, void* pPlacementAddr, IDepthStencilState** ppDepthStencilState) const = 0; /// Determines the amount of system memory required for a queue semaphore object. An allocation of this amount of /// memory must be provided in the pPlacementAddr parameter of CreateQueueSemaphore(). /// /// @param [in] createInfo Data controlling the queue semaphore properties, such as an initial semaphore count. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. /// /// @returns Size, in bytes, of system memory required for an IQueueSemaphore object with the specified properties. /// A return value of 0 indicates the createInfo was invalid. virtual size_t GetQueueSemaphoreSize( const QueueSemaphoreCreateInfo& createInfo, Result* pResult) const = 0; /// Creates an @ref IQueueSemaphore object with the requested properties. /// /// @param [in] createInfo Data controlling the queue semaphore properties, such as an initial semaphore /// count. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetQueueSemaphoreSize() with the /// same createInfo param. /// @param [out] ppQueueSemaphore Constructed queue semaphore object. When successful, the returned address will be /// the same as specified in pPlacementAddr. /// /// @returns Success if the queue semaphore was successfully created. Otherwise, one of the following errors may be /// returned: /// + ErrorInvalidPointer if pPlacementAddr or ppGpuMemory is null. /// + ErrorInvalidValue if createInfo.initialCount is outside of the [0..31] range. virtual Result CreateQueueSemaphore( const QueueSemaphoreCreateInfo& createInfo, void* pPlacementAddr, IQueueSemaphore** ppQueueSemaphore) = 0; /// Determines the amount of system memory required for a queue semaphore object created by opening a semaphore /// from a different device. An allocation of this amount of memory must be provided in the pPlacementAddr /// parameter of OpenSharedQueueSemaphore(). /// /// @param [in] openInfo Specifies a handle to a shared queue semaphore object to open. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. /// /// @returns Size, in bytes, of system memory required for opening a shared IQueueSemaphore object with the /// specified properties. A return value of 0 indicates the openInfo was invalid. virtual size_t GetSharedQueueSemaphoreSize( const QueueSemaphoreOpenInfo& openInfo, Result* pResult) const = 0; /// Opens a shareable queue semaphore object created on another device for use on this device. /// /// @param [in] openInfo Specifies a handle to a queue semaphore memory object to open. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetSharedQueueSemaphoreSize() with /// the same params. /// @param [out] ppQueueSemaphore Constructed queue semaphore object. When successful, the returned address will be /// the same as specified in pPlacementAddr. /// /// @returns Success if the shared semaphore was successfully opened for access on this device. Otherwise, one of /// the following errors may be returned: /// + ErrorInvalidPointer if pPlacementAddr or ppQueueSemaphore is null. /// + ErrorNotShareable if the specified queue semaphore object was not marked as shareable on creation. virtual Result OpenSharedQueueSemaphore( const QueueSemaphoreOpenInfo& openInfo, void* pPlacementAddr, IQueueSemaphore** ppQueueSemaphore) = 0; /// Determines the amount of system memory required for a queue semaphore object created by opening a semaphore from /// a different API which isn't a PAL client. An allocation of this amount of memory must be provided in the /// pPlacementAddr parameter of OpenExternalSharedQueueSemaphore(). /// /// @param [in] openInfo Specifies a handle to a shared queue semaphore object to open. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid /// the additional validation. /// /// @returns Size, in bytes, of system memory required for opening a shared IQueueSemaphore object with the /// specified properties. A return value of 0 indicates the openInfo was invalid. virtual size_t GetExternalSharedQueueSemaphoreSize( const ExternalQueueSemaphoreOpenInfo& openInfo, Result* pResult) const = 0; /// Opens a shareable queue semaphore object created on another API which isn't a PAL client for use on this device. /// /// @param [in] openInfo Specifies a handle to a queue semaphore memory object to open and flags. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetSharedQueueSemaphoreSize() with /// the same params. /// @param [out] ppQueueSemaphore Constructed queue semaphore object. When successful, the returned address will be /// the same as specified in pPlacementAddr. /// /// @returns Success if the shared semaphore was successfully opened for access on this device. Otherwise, one of /// the following errors may be returned: /// + ErrorInvalidPointer if pPlacementAddr or ppQueueSemaphore is null. virtual Result OpenExternalSharedQueueSemaphore( const ExternalQueueSemaphoreOpenInfo& openInfo, void* pPlacementAddr, IQueueSemaphore** ppQueueSemaphore) = 0; #if PAL_KMT_BUILD /// Opens an external NT handle from name. /// /// @param [in] handleInfo Specifies the NT handle attributes when opening from name. /// @param [out] pHandle A NT handle returned by runtime. /// /// @returns Success if the NT handle was successfully opened. Otherwise, one of /// the following errors may be returned: /// + ErrorInvalidValue if the name or attributes is invaild. virtual Result OpenExternalHandleFromName( const ExternalHandleInfo& handleInfo, OsExternalHandle* pHandle) = 0; #endif #if (PAL_KMT_BUILD || PAL_AMDGPU_BUILD) /// Query current gpu memory usage info and budget info of specified heap group of the device. /// /// @param [out] pInfo Heap usage and budget info reported from Os. virtual Result QueryGpuMemoryBudgetInfo( GpuMemoryBudgetInfo* pInfo) = 0; #endif /// Determines the amount of system memory required for an IFence object. An allocation of this amount of memory /// must be provided in the pPlacementAddr parameter of CreateFence(). /// /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid /// the additional validation. /// /// @returns Size, in bytes, of system memory required for an IFence object. virtual size_t GetFenceSize( Result* pResult) const = 0; /// Creates a GPU fence object. /// /// @param [in] createInfo Data controlling the fence properties /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetFenceSize(). /// @param [out] ppFence Constructed fence object. When successful, the returned address will be /// the same as specified in pPlacementAddr. /// /// @returns Success if the fence was successfully created. Otherwise, one of the following errors may be returned: /// + ErrorInvalidPointer if pPlacementAddr or ppFence is null. virtual Result CreateFence( const FenceCreateInfo& createInfo, void* pPlacementAddr, IFence** ppFence) const = 0; /// Opens a fence wihich was shared by another Device. /// /// @param [in] openInfo A reference to FenceOpenInfo, the handle is used if it's not null, or the /// event is opened via name. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetFenceSize(). /// @param [out] ppFence Constructed fence object. When successful, the returned address will be /// the same as specified in pPlacementAddr. /// @returns Success if the event was successfully reconstructed, otherwise an appropriate error code. virtual Result OpenFence( const FenceOpenInfo& openInfo, void* pPlacementAddr, IFence** ppFence) const = 0; /// Determines the amount of system memory required for an IGpuEvent object. An allocation of this amount of memory /// must be provided in the pPlacementAddr parameter of CreateGpuEvent(). /// /// @param [in] createInfo Properties of the GPU event object to create. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid /// the additional validation. /// /// @returns Size, in bytes, of system memory required for an IGpuEvent object. virtual size_t GetGpuEventSize( const GpuEventCreateInfo& createInfo, Result* pResult) const = 0; /// Creates a GPU event object. /// /// @param [in] createInfo Properties of the GPU event object to create. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetEventSize(). /// @param [out] ppGpuEvent Constructed event object. When successful, the returned address will be the same as /// specified in pPlacementAddr. /// /// @returns Success if the event was successfully created. Otherwise, one of the following errors may be returned: /// + ErrorInvalidPointer if pPlacementAddr or ppEvent is null. virtual Result CreateGpuEvent( const GpuEventCreateInfo& createInfo, void* pPlacementAddr, IGpuEvent** ppGpuEvent) = 0; /// Determines the amount of system memory required for a query pool object. An allocation of this amount of memory /// must be provided in the pPlacementAddr parameter of CreateQueryPool(). /// /// @param [in] createInfo Data controlling the query pool, such as what type of queries and how many slots are in /// the pool. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. /// /// @returns Size, in bytes, of system memory required for an IQueryPool object with the specified properties. A /// return value of 0 indicates the createInfo was invalid. virtual size_t GetQueryPoolSize( const QueryPoolCreateInfo& createInfo, Result* pResult) const = 0; /// Creates an @ref IQueryPool object with the requested properties. /// /// @param [in] createInfo Data controlling the query pool, such as what type of queries and how many slots are /// in the pool. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetQueryPoolSize() with the same /// createInfo param. /// @param [out] ppQueryPool Constructed query pool object. When successful, the returned address will be the /// same as specified in pPlacementAddr. /// /// @returns Success if the query pool was successfully created. Otherwise, one of the following errors may be /// returned: /// + ErrorInvalidPointer if pPlacementAddr or ppQueryPool is null. /// + ErrorInvalidValue if createInfo.numSlots is zero. virtual Result CreateQueryPool( const QueryPoolCreateInfo& createInfo, void* pPlacementAddr, IQueryPool** ppQueryPool) const = 0; /// Determines the amount of system memory required for a command allocator object. An allocation of this amount of /// memory must be provided in the pPlacementAddr parameter of CreateCmdAllocator(). /// /// @param [in] createInfo Command allocator properties including GPU memory allocation sizes. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation. /// /// @returns Size, in bytes, of system memory required for an ICmdAllocator object with the specified properties. /// A return value of 0 indicates the createInfo was invalid. virtual size_t GetCmdAllocatorSize( const CmdAllocatorCreateInfo& createInfo, Result* pResult) const = 0; /// Creates a command allocator object that can allocate GPU memory with the specified properties for use by command /// buffer objects. /// /// @param [in] createInfo Command allocator properties including GPU memory allocation sizes. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetCmdAllocatorSize() with the /// same createInfo param. /// @param [out] ppCmdAllocator Constructed command allocator object. When successful, the returned address will be /// the same as specified in pPlacementAddr. /// /// @returns Success if the command allocator was successfully created. Otherwise, one of the following errors may /// be returned: /// + ErrorInvalidPointer if pPlacementAddr or ppCmdAllocator is null. virtual Result CreateCmdAllocator( const CmdAllocatorCreateInfo& createInfo, void* pPlacementAddr, ICmdAllocator** ppCmdAllocator) = 0; /// Determines the amount of system memory required for a command buffer object. An allocation of this amount of /// memory must be provided in the pPlacementAddr parameter of CreateCmdBuffer(). /// /// @param [in] createInfo Command buffer properties including the target queue type. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid /// the additional validation. /// /// @returns Size, in bytes, of system memory required for an ICmdBuffer object with the specified properties. A /// return value of 0 indicates the createInfo was invalid. virtual size_t GetCmdBufferSize( const CmdBufferCreateInfo& createInfo, Result* pResult) const = 0; /// Creates a command buffer object that can build work intended for a particular queue type. /// /// @param [in] createInfo Command buffer properties including the target queue type. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetCmdBufferSize() with the same /// createInfo param. /// @param [out] ppCmdBuffer Constructed command buffer object. When successful, the returned address will be /// the same as specified in pPlacementAddr. /// /// @returns Success if the command buffer was successfully created. Otherwise, one of the following errors may be /// returned: /// + ErrorInvalidPointer if pPlacementAddr or ppCmdBuffer is null. virtual Result CreateCmdBuffer( const CmdBufferCreateInfo& createInfo, void* pPlacementAddr, ICmdBuffer** ppCmdBuffer) = 0; /// Determines the amount of system memory required for an indirect command generator object. An allocation of this /// amount must be provided in the pPlacementAddr parameter of CreateIndirectCmdGenerator(). /// /// @param [in] createInfo Indirect command generator properties. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid the /// additional validation steps. /// /// @returns Size, in bytes, of system memory required for an IIndirectCmdGenerator object with the specified /// properties. A return value of zero indicates the createInfo was invalid. virtual size_t GetIndirectCmdGeneratorSize( const IndirectCmdGeneratorCreateInfo& createInfo, Result* pResult) const = 0; /// Creates an indirect command generator object which can translate an application-specified command buffer into a /// format understandable by the GPU. /// /// @param [in] createInfo /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetIndirectCmdGeneratorSize() with /// the same createInfo param. /// @param [out] ppGenerator Constructed indirect command generator object. When successful, the returned /// address will be the same as specified in pPlacementAddr. /// /// @returns Success if the command generator was successfully created. Otherwise, one of the following errors may /// be returned: /// + ErrorInvalidPointer if pPlacementAddr or ppGenerator is null. virtual Result CreateIndirectCmdGenerator( const IndirectCmdGeneratorCreateInfo& createInfo, void* pPlacementAddr, IIndirectCmdGenerator** ppGenerator) const = 0; /// Determines the amount of system memory required for a perf experiment object. An allocation of this amount of /// memory must be provided in the pPlacementAddr parameter of CreatePerfExperiment(). /// /// @param [in] createInfo Properties of the performance experiment to be created. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid /// the additional validation. /// /// @returns Size, in bytes, of system memory required for an IPerfExperiment object with the specified properties. /// A return value of 0 indicates the createInfo was invalid. virtual size_t GetPerfExperimentSize( const PerfExperimentCreateInfo& createInfo, Result* pResult) const = 0; /// Creates a performance experiment object that can gather performance counter and trace data for a specific span /// of a command buffer. /// /// @param [in] createInfo Properties of the performance experiment to be created. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetPerfExperimentSize() with the /// same createInfo param. /// @param [out] ppPerfExperiment Constructed performance experiment object. When successful, the returned address /// will be the same as specified in pPlacementAddr. /// /// @returns Success if the perf experiment was successfully created. Otherwise, one of the following errors may be /// returned: /// + ErrorInvalidPointer if pPlacementAddr or ppCmdBuffer is null. virtual Result CreatePerfExperiment( const PerfExperimentCreateInfo& createInfo, void* pPlacementAddr, IPerfExperiment** ppPerfExperiment) const = 0; /// Gets @ref IPrivateScreen objects owned by this device. Private screens are screens not exposed through standard /// OS mechanisms. This function should be called again when any of the private screens are plugged or unplugged. /// The first call to this function enumerates all private screens and stores in device object as well. The next /// call triggered by hot-plug event enumerates private screens again but only destroys removed ones and creates /// new private screen objects for newly-added ones. The hash code generated at enumeration time is used as id of /// private screens. If the id of an enumerated private screen already exists, it is treated as unchanged. The EDID /// array and display index are used to generate MD5 hash code. /// /// @param [out] pNumScreens Pointer to the number of private sceens, note that this number does not mean first /// *pNumScreens elements in ppScreens are valid but just a hint that total *pNumScreens /// out of MaxPrivateScreens are valid. /// @param [out] ppScreens Pointer to the array of private screens. The client must pass in the pointer to an /// array of at least MaxPrivateScreens pointers to IPrivateScreen. /// /// @returns Success if the private screens are correctly retrieved. Otherwise, one of the following errors may be /// returned: /// + ErrorUnavailable if the device does not support private screen functionalities. virtual Result GetPrivateScreens( uint32* pNumScreens, IPrivateScreen** ppScreens) = 0; /// Registers an emulated @ref IPrivateScreen objects owned by this device. An emulated private screen doesn't have /// a physical display hardware connected to the GPU. This could be useful for debugging unusual configurations or /// using in automation systems when no real HMDs are available. /// /// @param [in] createInfo Properties of to create an emulated private screen object. /// @param [out] pTargetId Pointer to returned emulated private screen target id. /// /// @returns Success if the emulated private screen is correctly created. Otherwise, one of the following errors may /// be returned: /// + ErrorTooManyPrivateScreens if the device cannot create an emulated private screen. virtual Result AddEmulatedPrivateScreen( const PrivateScreenCreateInfo& createInfo, uint32* pTargetId) = 0; /// Removes an emulated @ref IPrivateScreen objects owned by this device. /// /// @param [in] targetId Target id of emulated private screen to be removed. /// /// @returns Success if the emulated private screen is correctly removed. Otherwise, one of the following errors may /// be returned: /// + ErrorUnknown if any unknown error occurs. virtual Result RemoveEmulatedPrivateScreen( uint32 targetId) = 0; /// Determines the amount of system memory required for a private screen image object (and an associated memory /// object). Allocations of these amounts of memory must be provided in the pImagePlacementAddr and /// pGpuMemoryPlacementAddr parameters of CreatePrivateScreenImage(). /// /// Only images created through this interface are valid sources for IPrivateScreen::Present(). /// /// @param [in] createInfo Properties of the image to create such as width/height and pixel format. /// @param [out] pImageSize Size, in bytes, of system memory required for the IImage. /// Should be specified to the pImagePlacementAddr argument of CreatePresentableImage(). /// @param [out] pGpuMemorySize Size, in bytes, of system memory required for a dummy IGpuMemory object attached to /// the private screen IImage. Should be specified to the pGpuMemoryPlacementAddr /// argument of CreatePrivateScreenImage(). /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid /// the additional validation. virtual void GetPrivateScreenImageSizes( const PrivateScreenImageCreateInfo& createInfo, size_t* pImageSize, size_t* pGpuMemorySize, Result* pResult) const = 0; /// Creates private screen presentable image. A private screen presentable image is similar to a regular presentable /// image but can only be presented on the private screens. It has some implicit properties relative to standard /// images, such as mipLevels=1, arraySize=1, numSamples=1 and etc. It also requires its bound GPU memory to be /// pinned before presenting. /// /// @param [in] createInfo Create info. /// @param [in] pImagePlacementAddr Pointer to the location where PAL should construct this object. There must /// be as much size available here as reported by calling /// GetPrivateScreenImageSizes(). /// @param [in] pGpuMemoryPlacementAddr Pointer to the location where PAL should construct a IGpuMemory associated /// with this peer image. There must be as much size available here as /// reported by calling GetPrivateScreenImageSizes(). /// @param [out] ppImage Constructed image object. /// @param [out] ppGpuMemory Constructed dummy memory object. This object is only valid for specifying /// in a memory reference list. /// /// @returns Success if the image was successfully created. Otherwise, one of the following errors may be returned: /// + ErrorPrivateScreenInvalidFormat if the format isn't supported on the private screen. /// + ErrorPrivateScreenRemoved if the private screen was removed. virtual Result CreatePrivateScreenImage( const PrivateScreenImageCreateInfo& createInfo, void* pImagePlacementAddr, void* pGpuMemoryPlacementAddr, IImage** ppImage, IGpuMemory** ppGpuMemory) = 0; /// Determines the amount of system memory required for an ISwapChain object. An allocation of this amount of memory /// must be provided in the pPlacementAddr parameter of CreateSwapChain(). /// /// @param [in] createInfo All the information related with this swap chain. /// @param [out] pResult The validation result if pResult is non-null. This argument can be null to avoid /// the additional validation. /// /// @returns Size, in bytes, of system memory required for an ISwapChain object. virtual size_t GetSwapChainSize( const SwapChainCreateInfo& createInfo, Result* pResult) const = 0; /// Create swap chain object based on the local window system. It doesn't include creation of presentable images. /// The presentable images should be associated with SwapChain object when presentable image is created. /// /// @param [in] createInfo All the information related with this swap chain. /// @param [in] pPlacementAddr Pointer to the location where PAL should construct this object. There must be as /// much size available here as reported by calling GetSwapChainSize(). /// @param [out] ppSwapChain Constructed swapchain object. When successful, the returned address will be the /// same as specified in pPlacementAddr. /// /// @returns Success if create swap chain instance successfully. Otherwise, one of the following errors may be /// returned: /// + ErrorUnknown if an unexpected internal error occurs. virtual Result CreateSwapChain( const SwapChainCreateInfo& createInfo, void* pPlacementAddr, ISwapChain** ppSwapChain) = 0; /// Sets a power profile for this device. /// /// @param [in] profile A profile is a pre-defined configuration indicates how KMD/PPLib is notified to work, /// e.g. raise or lower the GPU clock etc. /// @param [in,out] pInfo Custom power profile info needed for VrCustom mode, can be null for other modes. Note the /// actualSwitchInfo[] field is output part of @ref CustomPowerProfile. /// /// @returns Success if the profile is set successfully. Otherwise, one of the following errors may be /// returned: /// + ErrorUnavailable if this function is not available on this OS. /// + ErrorUnknown if an unexpected internal error occurs. virtual Result SetPowerProfile( PowerProfile profile, CustomPowerProfile* pInfo) = 0; /// Sends an escape call to the KMD to enable power optimizations for DirectML/ROCm workloads. The client must call /// this function when the context for any ROCm or DirectML workload is created or destroyed. /// /// @param [in] enableOptimization Set to true if called during context creation (enable power optimizations), /// false if context destroyed (restore defaults). /// /// @returns Success if the power optimization is set successfully. Otherwise, one of the following errors may be /// returned: /// + ErrorUnavailable if this function is not available on this OS. /// + ErrorUnknown if an unexpected internal error occurs. virtual Result SetMlPowerOptimization( bool enableOptimization) const = 0; /// Queries workstation caps on this device. /// /// @param [out] pCaps Pointer to location where pal should write back workstation caps. /// /// @returns Success if wokstation caps is got from KMD successfully. Otherwise, one of the following errors may be /// returned: /// + ErrorOutOfMemory if out of system memory. virtual Result QueryWorkStationCaps( WorkStationCaps* pCaps) const = 0; /// Queries display connectors installed on the GPU /// /// @param [in,out] pConnectorCount Input value specifies the maximum number of connectors to enumerate, and the /// output value specifies the total number of display modes that were enumerated /// in pConnectors. The input value is ignored if pConnectors is null. /// This pointer must not be null. /// @param [out] pConnectors Output list of connectors. Can be null, in which case the total number of /// available connectors will be written to pConnectorCount. /// /// /// @returns Success if the profile is set successfully. Otherwise, one of the following errors may be /// returned: /// + ErrorUnavailable if this function is not available on this OS. /// + ErrorUnknown if an unexpected internal error occurs. virtual Result QueryDisplayConnectors( uint32* pConnectorCount, DisplayConnectorProperties* pConnectors) = 0; /// Queries the dispatch kernel source code /// /// @returns Returns the source code for the kernel dispatch function used in /// the kernel scheduler for device enqueue (CL C code) or nullptr if it's not available. /// /// @note This function is to support OpenCL AQL submissions. virtual const char* GetDispatchKernelSource() const = 0; /// @} /// Query the Flgl state from the device. Device will query the connectivity of GLSync card and return /// the state. Pal internal state of Flgl will be updated. /// /// @param [out] pState Pointer to the location that PAL should write the internal flgl states back. /// /// @returns Success if query returns with success. Otherwise, one of the following errors may returned: /// + ErrorOutOfMemory if out of system memory. /// + ErrorUnknown if an unexpected internal error occurs. virtual Result FlglQueryState( FlglState* pState) = 0; /// Set the Flgl config of the device. /// /// @param [in] glSyncConfig const reference to the config struct. /// /// @returns Success if setting returns with success. Otherwise, one of the following errors may returned: /// + ErrorUnknown if an unexpected internal error occurs. /// + ErrorUnsuppported if the this GenLock function is not available. virtual Result FlglSetSyncConfiguration( const GlSyncConfig& glSyncConfig) = 0; /// Get the Flgl config of the device. /// This function cannot be called if FlglState's support value is FlglSupport::NotAvailable. /// /// @param [out] pGlSyncConfig Pointer to the location that PAL should write the config back. /// /// @returns Success if query returns with success. Otherwise, one of the following errors may returned: /// + ErrorUnknown if an unexpected internal error occurs. /// + ErrorInvalidPointer if pGlSyncConfig is null poiter. virtual Result FlglGetSyncConfiguration( GlSyncConfig* pGlSyncConfig) const = 0; /// Set the Framelock to disable or enable. Client should call this interface first to enable/disable Flgl. /// This function cannot be called if FlglState's support value is FlglSupport::NotAvailable. /// /// @param [in] enable If true enables KMD framelock, otherwise disables framelock. /// /// @returns Success if framelock enable/disable successfully. Otherwise, one of the following errors may be /// returned: /// + ErrorUnavailable if this function is not supported on this Asic. /// + ErrorUnknown if an unexpected internal error occurs. virtual Result FlglSetFrameLock( bool enable) = 0; /// Set the Genlock to disable or enable. /// This function cannot be called if FlglState's support value is FlglSupport::NotAvailable. /// /// @param [in] enable If true enables the genlock, otherwise disables genlock. /// /// @returns Success if genlock enable/disable successfully. Otherwise, one of the following errors may be /// returned: /// + ErrorUnavailable if this function is not supported on this Asic. /// + ErrorUnknown if an unexpected internal error occurs. virtual Result FlglSetGenLock( bool enable) = 0; /// Reset the framelock HW counter. The following counter operations are directly submit to hardware via I2C /// interface Pal doesn't store the counter internally. Client should manage the counter /// /// @returns Success if the HW counter is reset successfully. Otherwise, one of the following errors may be /// returned: /// + ErrorUnavailable if this function is not available on this Asic. /// + ErrorUnknown if an unexpected internal error occurs. virtual Result FlglResetFrameCounter() const = 0; /// Check if a reset/discontinuity of HW framecounter occurs. If returns false, there is no need to query HW frame /// counter, client should update its software counter instead. If returns true, client is required to query HW /// counter and adjusts its software counter accordingly. /// /// @param [out] pReset Pointer to the location that PAL should write the reset status back. /// /// @returns Success if the reset status is returned. Otherwise, one of the following errors may be returned: /// + ErrorUnavailable if this function is not available on this Asic. /// + ErrorUnknown if an unexpected internal error occurs. virtual Result FlglGetFrameCounterResetStatus( bool* pReset) const = 0; /// Get the framelock HW counter. /// /// @param [out] pValue Pointer to the location that PAL should write the frame counter value back. /// @param [out] pReset Pointer to the location that PAL should write the frame counter reset state. /// /// @returns Success if the frame counter is returned. Otherwise, one of the following errors may be returned: /// + ErrorUnavailable if this function is not available on this Asic. /// + ErrorUnknown if an unexpected internal error occurs. virtual Result FlglGetFrameCounter( uint64* pValue, bool* pReset) const = 0; /// Checks if the specified externally-controlled feature settings have changed since the last time the function was /// called. /// /// This is intended to be a lightweight function that can be called per frame per feature. If the function /// returns Result::Success and (*pRsFeaturesChanged & RsFeatureTypeXX) != 0, then the user changed some related /// settings in the UI. /// /// If TurboSync has updated, the client should first try to re-read the application profile settings by calling /// IPlatform::QueryRawApplicationProfile() with client = User3D. If that returns Unsupported, then fall back /// to device-wide TurboSync settings read via GetRsFeatureGlobalSettings(). /// /// If Chill has updated, call IPlatform::QueryRawApplicationProfile() with client = Chill to re-read the /// system app profiles and then with client = User3D for any per-user Chill overrides, and additionally /// call GetRsFeatureGlobalSettings() to get the Chill enabled state. /// /// If Delag has updated, call IPlatform::QueryRawApplicationProfile() with client = User3D to get the enabled /// state, and additionally call GetRsFeatureGlobalSettings() to get the Delag hotkey. /// /// /// @param [in] rsFeatures Bitmask of RsFeatureType value(s) to query. Use UINT_MAX to poll all. /// @param [out] pRsFeaturesChanged Bitmask of queried RsFeatureTypes that have changed since last polling. /// /// @returns Success if the call succeeded. virtual Result DidRsFeatureSettingsChange( uint32 rsFeatures, uint32* pRsFeaturesChanged) = 0; /// Gets externally-controlled per-device settings for the requested RsFeatureType. /// /// @param [in] rsFeature Feature type to request information for (singular, not a mask). /// @param [out] pRsFeatureInfo Settings related to the specified RsFeatureType. /// /// @returns Success if the call succeeded. virtual Result GetRsFeatureGlobalSettings( RsFeatureType rsFeature, RsFeatureInfo* pRsFeatureInfo) = 0; /// Update Chill Status (last active time stamp). After every frame, UMD needs to generate a time stamp and inform /// KMD through the shared memory, if the time stamp changes between 2 frames, it means Chill is active and KMD /// needs to adjust power through PSM. /// /// @param [in] lastChillActiveTimeStampUs the last Chill active time stamp in microseconds to set /// /// @returns Success if the call succeeded. virtual Result UpdateChillStatus( uint64 lastChillActiveTimeStampUs) = 0; /// Make the Bus Addressable allocations available to be accessed by remote device. /// Exposes the surface and marker bus addresses for each allocation. These bus addresses can be accessed by /// calling @ref IGpuMemory::Desc() on the appropriate object. /// Client drivers must call @ref AddGpuMemoryReferences() for all relevant allocations before calling this. /// /// @param [in] pQueue Queue used by PAL for performing this operation. /// @param [in] gpuMemCount Number of GPU memory allocations to expose to remote devices. /// @param [in] ppGpuMemList Array of gpuMemCount IGpuMemory objects. /// /// @returns Success if bus addresses are available by calling @ref IGpuMemory::Desc() on all IGpuMemory objects virtual Result InitBusAddressableGpuMemory( IQueue* pQueue, uint32 gpuMemCount, IGpuMemory*const* ppGpuMemList) = 0; /// Create virtual display. Virtual display is similar to the regular display (IScreen), the difference is the /// virtual display doesn't have a physical monitor connected. When CreateVirtualDisplay is called, KMD will /// generate a hot-plug-in event to notify application a new display is added. Then the app/client will call PAL to /// re-querythe attached screens and they will find a new one in the list that is pretend, but they can use it just /// like a normal display. /// /// @param [in] virtualDisplayInfo Virtual display creation infomation. /// @param [out] pScreenTargetId The screen target ID returned by KMD /// /// @returns Success if the call succeeded. virtual Result CreateVirtualDisplay( const VirtualDisplayInfo& virtualDisplayInfo, uint32* pScreenTargetId) = 0; /// Destroy virtual display. When DestroyVirtualDisplay is called KMD will generate a hot-plug-out event to notify /// application a virtual display is removed, it also will be removed from the display list, and app/client can't /// uses it anymore. /// /// @param [in] screenTargetId Screen target ID. /// /// @returns Success if the call succeeded. virtual Result DestroyVirtualDisplay( uint32 screenTargetId) = 0; /// Query virtual display Properties from screen target Id. /// /// @param [in] screenTargetId Screen target ID. /// @param [out] pVirtualDisplayProperties A pointer to VirtualDisplayProperties /// /// @returns Success if the call succeeded. virtual Result GetVirtualDisplayProperties( uint32 screenTargetId, VirtualDisplayProperties* pProperties) = 0; /// Determines if hardware accelerated stereo rendering can be enabled for given graphic pipeline. /// If hardware accelerate stereo rendering can be enabled, client doesn't need to do shader patching /// which includes translating view id intrinsic to user data slot, outputing render target /// array index and viewport array index in shader closest to scan converter. /// /// @param [in] viewInstancingInfo Graphic pipeline view instancing information. /// /// @returns True if hardware accelerated stereo rendering can be enabled, False otherwise. virtual bool DetermineHwStereoRenderingSupported( const GraphicPipelineViewInstancingInfo& viewInstancingInfo) const = 0; /// Get the UTF-8 file path used to put all files for cache purpose. /// On Windows, the returned path depends on Util::GetProcessIntegrityLevel and Util::IsProcessInAppContainer. /// /// @returns Pointer to cache file path. virtual const char* GetCacheFilePath() const = 0; /// Get the UTF-8 file path used to put all files for debug purpose (such as logs, dumps, replace shader). /// /// @returns Pointer to debug file path. virtual const char* GetDebugFilePath() const = 0; /// Queries the base driver Radeon Software Version string (as shown in Radeon Settings). /// /// @param [out] pBuffer A non-null pointer to the buffer where the string will be written. /// @param [in] bufferLength The byte size of the string buffer (must be non-zero). /// /// @returns Success if the string was successfully retrieved. Otherwise, one of the following errors /// may be returned: /// + Unsupported if this function is not available on this environment. /// + NotFound if the Radeon Software Version string is not present. /// + ErrorInvalidValue if nullptr was passed for pBuffer or 0 for bufferLength. virtual Result QueryRadeonSoftwareVersion( char* pBuffer, size_t bufferLength) const = 0; /// Queries the base Driver Release Version string. /// /// @param [out] pBuffer A non-null pointer to the buffer where the string will be written. /// @param [in] bufferLength The byte size of the string buffer (must be non-zero). /// /// @returns Success if the string was successfully retrieved. Otherwise, one of the following errors /// may be returned: /// + Unsupported if this function is not available on this environment. /// + NotFound if the Release Version string is not present. /// + ErrorInvalidValue if nullptr was passed for pBuffer or 0 for bufferLength. virtual Result QueryReleaseVersion( char* pBuffer, size_t bufferLength) const = 0; /// Returns the value of the associated arbitrary client data pointer. /// Can be used to associate arbitrary data with a particular PAL object. /// /// @returns Pointer to client data. void* GetClientData() const { return m_pClientData; } /// Sets the value of the associated arbitrary client data pointer. /// Can be used to associate arbitrary data with a particular PAL object. /// /// @param [in] pClientData A pointer to arbitrary client data. void SetClientData( void* pClientData) { m_pClientData = pClientData; } #if defined(__unix__) /// Gets the modifier list and modifier count according to image format. /// /// @param [in] format Image format. /// @param [out] pModifierCount Modifier count. /// @param [out] pModifiersList A pointer to the modifier list. virtual void GetModifiersList( ChNumFormat format, uint32* pModifierCount, uint64* pModifiersList) const = 0; #endif /// Passes the HIP runtime state setup to KMD on behalf of the HIP runtime. /// /// @param [in] runtimeState A structure containing runtime state information to pass to KMD /// /// @returns Result for error handling. virtual Result RegisterHipRuntimeState(const HipRuntimeSetup& runtimeState) const = 0; /// Sets the second-level trap handler for HIP /// /// @param [in] pTrapHandlerCode A pointer to the piece of memory containing the trap handler code /// This may be nullptr, which indicates that there is no secondary trap handler. /// @param [in] codeOffset An offset, in bytes, into the pTrapHandlerCode's memory region /// @param [in] pTrapHandlerMemory A pointer to the piece of memory containing the trap handler's memory /// This may be nullptr, which indicates that there is no valid trap handler /// memory. /// @param [in] memoryOffset An offset, in bytes, into the pTrapHandlerMemory's memory region /// /// @returns Result for error handling. virtual Result SetHipTrapHandler( const IGpuMemory* pTrapHandlerCode, gpusize codeOffset, const IGpuMemory* pTrapHandlerMemory, gpusize memoryOffset) const = 0; /// Sets the CmdDisassembly::ICmdBufferReporting associated with this device /// Currently only one can be registered at a time - but this could be changed in the future /// /// @param [in] pInterface A pointer to the CmdDisassembly::ICmdBufferReporting being /// registered /// @return Result::Success if successful /// Result::Unsupported if feature is not supported /// Result::AlreadyExists if there is already a CmdDisassembly::ICmdBufferReporting /// registered with this devide /// Result::ErrorInvalidValue if pInterface == nullptr /// virtual Result RegisterCmdReportingInterface( CmdDisassembly::ICmdBufferReporting* pInterface) { return Result::Unsupported; } /// Resets the CmdDisassembly::ICmdBufferReporting associated with this device to nullptr /// Currently only one can be registered at a time - but this could be changed in the future /// /// @param [in] pInterface A pointer to the CmdDisassembly::ICmdBufferReporting that /// was registered /// @return Result::Success if successful /// Result::Unsupported if feature is not supported /// Result::ErrorInvalidValue if pInterface == nullptr or pInterface was not the last /// CmdDisassembly::ICmdBufferReporting registered /// virtual Result UnregisterCmdReportingInterface( CmdDisassembly::ICmdBufferReporting* pInterface) { return Result::Unsupported; } protected: /// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly /// called the proper create method. IDevice() : m_pClientData(nullptr) {} /// @internal Destructor. Prevent use of delete operator on this interface. Device objects will be destroyed when /// @ref IPlatform::Destroy() is called. virtual ~IDevice() { } DeviceInterfacePfnTable m_pfnTable; ///< SRD function pointer table. private: /// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData() /// and set via SetClientData(). /// For non-top-layer objects, this will point to the layer above the current object. void* m_pClientData; }; /** *********************************************************************************************************************** * @defgroup ResourceBinding Resource Binding Model * * _Resource binding_ refers to the process of binding resources (textures, UAVs, samplers, etc.) for access by shaders * in a pipeline. This is an area where 3D APIs diverge significantly. PAL's model is designed to minimally abstract * the underlying hardware in a way that allows performant implementations by each client driver. * * ### Hardware User Data * GCN hardware has 16 _user data_ registers that act as a generic interface for passing values from a command buffer to * a shader. User data registers are set to their desired value via packets in a command buffer, then the specified * values are loaded from the user data registers into shader GPRs when a wave is launched. Since the user data is just * arbitrary generic data, this method can be used to pass any type of data a client may want to specify directly from * a command buffer, for example: * * - __Constant value__ - a 32-bit floating point or integer constant could be written into user data then be used * directly by the shader. * - __Shader resource descriptor (SRD)__ - 4 or 8 dwords of consecutive data could be an SRD which will be used as a * t#, s#, etc. by the shader. * - __Pointer__ - The user data could be an arbitrary GPU virtual address where a table of constants, SRDs, etc. are * stored. * * The 3D driver and shader compiler are responsible for working together to define how resources referenced in a shader * should be mapped to user data bound in a command buffer. * * ### PAL User Data * PAL only lightly abstracts the hardware user data concept. DeviceProperties reports the number of user data entries * supported on the device in maxUserDataEntries. Note that some clients may require more user data entries than there * are physical user data registers - PAL will manage "spilling" of user data entries to GPU memory if necessary. * * User data entries are set in a command buffer by calling ICmdBuffer::CmdSetUserData(). * * ### Shader User Data Mapping * When creating a pipeline, the client must specify how the user data entries set in a command buffer map to resources * referenced by each shader in the pipeline. This is done in the pUserDataNodes array of PipelineShaderInfo. * * The resource mapping is built as a graph of _resource mapping nodes_ where the root nodes in the graph correspond * to the user data entries. Each node fits in one of the following categories: * * - __SRD__: A 4 or 8 dword descriptor describing a shader resource. The mapping specifies the type and slot the SRD * corresponds to (e.g., UAV 3 or sampler 7). * - __Descriptor table pointer__: A GPU virtual address pointing at an array of other nodes. Typically this will be a * pointer to GPU memory containing just SRDs, but tables are free to be built hierarchically such that tables have * pointers to other tables in them. * - __Inline constants__: 32-bit constants loaded directly byu the shader. The mapping specified the CB slot that * should load the constant (e.g., cb3[1]). * - __Unused__: A particular shader may not use all entries in a user data layout, and those should be marked unused. * * The following image illustrates a simple user data mapping: * * @image html userDataMapping.png * * ### Building Descriptor Tables * The client is responsible for building specifying SRDs and pointers to GPU memory in order to execute the shader * resource mapping specified during pipeline creation. SRDs can be created with several methods provided by IDevice: * * - CreateTypedBufferViewSrds() * - CreateUntypedBufferViewSrds() * - CreateImageViewSrds() * - CreateFmaskViewSrds() * - CreateSamplerSrds() * * The size required for each of these SRD types is returned in the srdSizes structure in DeviceProperties. * * When building descriptor tables in GPU memory, the client will need to retrieve a virtual address of the GPU memory * where the tables exist in order to reference them from user data or from other descriptor tables. IGpuMemory * provides the GetVirtAddr() method for this purpose. *********************************************************************************************************************** */ } // Pal