/* *********************************************************************************************************************** * * Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * **********************************************************************************************************************/ /** *********************************************************************************************************************** * @file palQueue.h * @brief Defines the Platform Abstraction Library (PAL) IQueue interface and related types. *********************************************************************************************************************** */ #pragma once #include "pal.h" #include "palDestroyable.h" #include "palEvent.h" namespace Pal { // Forward declarations. class ICmdBuffer; class IFence; class IGpuMemory; class IImage; class IPrivateScreen; class IQueueSemaphore; class IScreen; class ISwapChain; struct CmdBufInfo; struct GpuMemSubAllocInfo; struct GpuMemoryRef; struct DoppRef; enum class VirtualGpuMemAccessMode : uint32; /// Specifies whether presents are windowed or fullscreen. This will determine whether the present is performed via a /// BLT or flip. enum class PresentMode : uint32 { Unknown, Windowed, Fullscreen, Count }; /// Enumerates the possible overrides for the flip interval. enum class FlipIntervalOverride : uint32 { _None = 0, ///< No override. Immediate = 1, ///< Zero frames of flip latency. ImmediateAllowTearing = 2, ///< Same as Immediate, but allows tearing (no vsync). One = 3, ///< One frame of flip latency. Two = 4, ///< Two frames of flip latency. Three = 5, ///< Three frames of flip latency. Four = 6, ///< Four frames of flip latency. }; /// Defines flags for describing which types of present modes are supported on a given queue. enum PresentModeSupport : uint32 { SupportWindowedPresent = 0x1, SupportWindowedPriorBlitPresent = 0x2, SupportFullscreenPresent = 0x4, }; /// Defines submit-time bottlenecks which PAL can potentially optimize. enum class SubmitOptMode : uint32 { Default = 0, ///< PAL will enable optimizations when generally efficient. Disabled = 1, ///< Disable all optimizations that could be detrimental in special cases. MinKernelSubmits = 2, ///< Minimize the overhead of launching command buffers on the CPU and GPU. MinGpuCmdOverhead = 3, ///< Minimize the overhead of reading command buffer commands on the GPU. Count }; /// Enumerates vcn instance affinity statuses enum MmAffinityStatus : uint32 { MmAffinityNotAllowed = 0, ///< The specific vcn instance can't be used. MmAffinityAllowed = 1 ///< The specific vcn instance can be used. }; /// Union describes all vcn instance affinity status. union MmAffinity { struct { uint32 vcn0Affinity : 2; ///< Affinity for instance vcn0 uint32 vcn1Affinity : 2; ///< Affinity for instance vcn1 uint32 reserved : 28; ///< Reserved (all 0) }; uint32 u32All; }; /// Structure describing dump information for a command buffer. struct CmdBufferDumpDesc { EngineType engineType; ///< The engine type that this buffer is targeted for. QueueType queueType; ///< The type of queue that this buffer is being created on. SubEngineType subEngineType; ///< The ID of which sub-engine that this buffer is made for. uint32 cmdBufferIdx; ///< The index into the SubmitInfo ppCmdBuffers array that this /// command buffer dump came from. union { struct { uint8 isPreamble : 1; ///< Set if the buffer is an internal preamble command buffer. uint8 isPostamble : 1; ///< Set if the buffer is an internal postamble command buffer. uint8 reserved : 6; ///< Reserved for future use. }; uint8 u32All; ///< Flags packed as 8-bit uint. } flags; }; /// Structure describing a command buffer chunk for use while dumping command buffers. struct CmdBufferChunkDumpDesc { uint32 id; ///< ID (number) of this command chunk within the command buffer. const void* pCommands; ///< Pointer to the command data. size_t size; ///< Size of valid data in bytes pointed to in pCommands. }; /// Definition for command buffer dumping callback. /// /// @param [in] cmdBufferDesc Description of the command buffer. /// @param [in] pChunks Pointer to an array of command buffer chunk descriptions. /// @param [in] numChunks The number of chunks pointed to in pChunks. typedef void (PAL_STDCALL* CmdDumpCallback)( const CmdBufferDumpDesc& cmdBufferDesc, const CmdBufferChunkDumpDesc* pChunks, uint32 numChunks, void* pUserData); /// Specifies properties for @ref IQueue creation. Input structure to IDevice::CreateQueue(). struct QueueCreateInfo { QueueType queueType; ///< Selects which type of queue to create. EngineType engineType; ///< Selects which type of engine to create. uint32 engineIndex; ///< Which instance of the specified engine type to query. For example, there /// can be multiple compute queues, so this parameter distinguished between them. SubmitOptMode submitOptMode; ///< A hint telling PAL which submit-time bottlenecks should be optimized, if any. QueuePriority priority; ///< A hint telling PAL to create queue with proper priority. /// It is only supported if supportQueuePriority is set in DeviceProperties. /// In Linux, if we don't have root privilege, the creation with above-Medium /// priority will fail. Client should take the corresponding action like retry /// with lower priority, if necessary. struct { uint32 aqlQueue : 1; ///< Compute queue will process AQL packets and kernels uint32 windowedPriorBlit : 1; ///< All windowed presents on this queue are notifications /// that the client has manually done a blit present uint32 tmzOnly : 1; ///< This queue allows only TMZ submissions. Required for /// compute TMZ submits. #if PAL_AMDGPU_BUILD uint32 enableGpuMemoryPriorities : 1; ///< Enables support for GPU memory priorities on this Queue. /// This is optional because enabling the feature requires /// a small amount of memory overhead per-Queue for /// bookkeeping purposes. #else uint32 placeholder2 : 1; ///< Reserved field. Set to 0. #endif uint32 dispatchTunneling : 1; ///< This queue uses compute dispatch tunneling. uint32 forceWaitIdleOnRingResize : 1; ///< This queue need to wait for idle before resize RingSet. /// This is intended as a workaround for misbehaving applications. #if defined(_WIN32) uint32 nullRendering : 1; ///< Setting this bit makes this queue behave like IfhModeKmd. #else uint32 placeholder3 : 1; ///< Reserved field. Set to 0. #endif uint32 reserved : 25; ///< Reserved for future use. }; uint32 numReservedCu; ///< The number of reserved compute units for RT CU queue #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 914 uint32 persistentCeRamOffset; ///< Byte offset to the beginning of the region of CE RAM which this Queue should /// preserve across consecutive submissions. Must be a multiple of 32. It is an /// error to specify a nonzero value here if the the Device does not support /// @ref supportPersistentCeRam for the Engine this Queue will attach to. uint32 persistentCeRamSize; ///< Amount of CE RAM space which this Queue should preserve across consecutive /// submissions. Units are in DWORDs, and this must be a multiple of 8. It is an /// error to specify a nonzero value here if the the Device does not support /// @ref supportPersistentCeRam for the Engine this Queue will attach to. #endif uintptr_t aqlPacketList; ///< Location of the HIP runtime's info about this queue }; /// Specifies the portion of @ref SubmitInfo that is specific to each sub-queue in a multi-queue object (@see /// IDevice::CreateMultiQueue). Effectively, this enables specifying a different set of command buffers for each /// queue that makes up a gang submission to a multi-queue object. struct PerSubQueueSubmitInfo { uint32 cmdBufferCount; ///< Number of command buffers to be submitted (can be 0 if this submit doesn't /// involve work for the relevant queue). ICmdBuffer*const* ppCmdBuffers; ///< Array of cmdBufferCount command buffers to be submitted. Command buffers /// that are part of a ganged submit must guarantee the conditions required /// for the optimizeExclusiveSubmit flag. const CmdBufInfo* pCmdBufInfoList; ///< Null, or an array of cmdBufferCount structs providing additional /// info about the command buffers being submitted. If non-null, /// elements are ignored if their isValid flag is false. }; /// Specifies all information needed to execute a set of command buffers. Input structure to IQueue::Submit(). /// /// Some members of this structure are not supported on all platforms. The client must check the appropriate properties /// structures to determine if the corresponding features are supported: /// + pGpuMemoryRefs: Support is indicated by supportPerSubmitMemRefs in @ref DeviceProperties. /// + ppBlockIfFlipping: Support is indicated by supportBlockIfFlipping in @ref PlatformProperties. If it is supported, /// the client must not specify a blockIfFlippingCount greater than MaxBlockIfFlippingCount. /// /// @note If this queue is running in physical submission mode (due to hardware restrictions), the gpuMemRefCount and /// pGpuMemoryRefs arguments to this method are ignored because the command buffers themselves contain their own /// GPU memory reference lists. struct MultiSubmitInfo { const PerSubQueueSubmitInfo* pPerSubQueueInfo;///< Specifies per-subqueue information for the submit. Typically /// this is a pointer to a single entry specifying the command /// buffers to be submitted on this queue. For gang submission on /// a multi-queue, this should be an array with one entry per /// sub-queue. The array size must be less than or equal to the /// queueCount specified when the multi-queue was created and /// the workload specified in each entry will be assigned to the /// corresponding sub-queue. It is valid to have a cmdBufferCount /// of 0 for sub-queues without work. Can be null if perSubQueueInfo- /// Count is 0. uint32 perSubQueueInfoCount; ///< Number of PerSubqueueSubmitInfo to be submitted. Can be zero if /// there is no work to submit. uint32 gpuMemRefCount; ///< Number of GPU memory references for this submit. const GpuMemoryRef* pGpuMemoryRefs; ///< Array of gpuMemRefCount GPU memory references. Can be null if /// gpuMemRefCount is zero. The GPU memory objects will be made /// resident for the duration of this submit. uint32 doppRefCount; ///< Number of DOPP desktop texture references for this submit. const DoppRef* pDoppRefs; ///< Array of doppRefCount DOPP texture references. Can be null if /// doppRefCount is zero. uint32 externPhysMemCount; ///< Number of entries in ppExternPhysMem. const IGpuMemory** ppExternPhysMem; ///< Array of external physical memory allocations to be initialized /// as part of this submit. The first submit that references a /// particular external physical memory allocation must include /// that allocation in this list. Subsequent submits that reference /// the same allocation should not include it in this list, as it /// would trigger redundant GPU page table initialization. uint32 blockIfFlippingCount; ///< Number of GPU memory objects to protect when flipped. const IGpuMemory*const* ppBlockIfFlipping; ///< Array of blockIfFlippingCount GPU memory objects. Can be null if /// blockIfFlippingCount is zero. The command buffers will not be /// scheduled to the GPU while a fullscreen (flip) present is queued /// for any of these GPU memory allocations. uint32 fenceCount; ///< Number of fence objects to be signaled once the last command buffer /// in this submission completes execution. IFence** ppFences; ///< Array of fence objects. Can be null if fenceCount is zero. CmdDumpCallback pfnCmdDumpCb; ///< Null, or a callback function to handle the dumping of the /// command buffers used in this submit. void* pUserData; ///< Client provided data to be passed to callback. uint32 stackSizeInDwords; ///< 0, or the max of stack frame size for indirect shaders of the /// pipelines referenced in the command buffers of this submission. /// The size is per native thread. So that the client will have to /// multiply by 2 if a Wave64 shader that needs scratch is used. /// Note that the size will not shrink for the lifetime of the queue /// once it is grown and only affects compute scratch ring. const IGpuMemory* pFreeMuxMemory; ///< The gpu memory object of the private flip primary surface for the /// FreeMux feature. }; typedef MultiSubmitInfo SubmitInfo; /// The value of blockIfFlippingCount in @ref SubmitInfo cannot be greater than this value. constexpr uint32 MaxBlockIfFlippingCount = 16; /// Specifies properties for the presentation of an image to the screen. Input structure to IQueue::PresentDirect(). struct PresentDirectInfo { union { struct { uint32 fullscreenDoNotWait : 1; ///< Fail the present immediately if the present queue is full. uint32 srcIsTypedBuffer : 1; ///< True if the source is a typed buffer instead of an image. uint32 dstIsTypedBuffer : 1; ///< True if the destination is a typed buffer instead of an image. uint32 notifyOnly : 1; ///< Indicates that a present occurred outside of PAL. PAL must not /// execute a present if this is true but may update internal /// tracking state. uint32 reserved : 28; ///< Reserved for future use. }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< Present flags. OsWindowHandle hWindow; ///< Native OS window handle that this image should be presented to. PresentMode presentMode; ///< Chooses between windowed and fullscreen present. uint32 presentInterval; ///< Must be an integer from 0 to 4. 0 indicates that the present should /// occur immediately (may tear), and 1-4 indicates the present should /// occur after 1 to 4 vertical syncs. Only valid for fullscreen presents. union { IImage* pSrcImage; ///< Optional: The image to be presented. If null, the present will not /// occur but PAL may still call into the OS on certain platforms that /// expect it. IGpuMemory* pSrcTypedBuffer; ///< The typed buffer to be presented. If null, the present will not occur /// but PAL may still call into the OS on certain platforms that expect it. }; union { IImage* pDstImage; ///< Optional: copy from the source image to this image. If null, PAL will /// automatically copy into the appropriate platform-specific destination. /// This is only supported for windowed mode presents. IGpuMemory* pDstTypedBuffer; ///< The typed buffer to be presented. If null, the present will not occur /// but PAL may still call into the OS on certain platforms that expect it. }; }; /// Media stream counter information. struct MscInfo { uint64 targetMsc; ///< if the current MSC is less than , the buffer swap ///< will occur when the MSC value becomes equal to uint64 divisor; ///< Divisor ///< the buffer swap will occur the next time the MSC value is ///< incremented to a value such that MSC % = ///< if the current MSC is greater than or equal to uint64 remainder; ///< Remainder }; /// Specifies properties for the presentation of an image to the screen. Input structure to IQueue::PresentSwapChain(). struct PresentSwapChainInfo { PresentMode presentMode; ///< Chooses between windowed and fullscreen present. IImage* pSrcImage; ///< The image to be presented. ISwapChain* pSwapChain; ///< The swap chain associated with the source image. uint32 imageIndex; ///< The index of the source image within the swap chain. Owership of this image /// index will be released back to the swap chain if this call succeeds. uint32 rectangleCount; ///< Number of valid rectangles in the pRectangles array. uint32 syncInterval; ///< Applicable only when syncIntervalOverride is set /// 0 - The presentation occurs immediately, there is no synchronization. /// 1 through 4 - Synchronize presentation after the nth vertical blank. const Rect* pRectangles; ///< Array of rectangles defining the regions which will be updated. uint64 presentId; ///< PresentId functions as an identifier for present operations on a swapchain. /// If this PresentId is non-zero, then the application can later use this value /// to refer to that image presentation. A value of zero indicates that this /// presentation has no associated presentId. A non-zero presentId must be greater /// than any non-zero presentId passed previously by the application for the same /// swapchain. union { struct { #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 941 uint32 notifyOnly : 1; ///< True if it is a notify-only present #else uint32 notifyOnly : 1; ///< Indicates that a present occurred outside of PAL. PAL must not /// execute a present if this is true but may update internal /// tracking state. #endif uint32 isTemporaryMono : 1; ///< True if WS Stereo is enabled, but 3D display mode turned off. uint32 turboSyncEnabled : 1; ///< Whether TurboSync is enabled. uint32 syncIntervalOverride : 1; ///< Override default syncInterval with the value in syncInterval /// Supported only on Windows wsiPlatforms. uint32 reserved : 28; ///< Reserved for future use. }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< PresentSwapChainInfo flags. #if PAL_AMDGPU_BUILD MscInfo mscInfo; ///< Media stream counter information #endif }; /// Specifies a mapping from a range of pages in a virtual GPU memory object to a range of pages in a real GPU memory /// object. Input to IQueue::RemapVirtualMemoryPages(). /// /// When mapping pages of a virtual GPU memory object to a range of pages in a real GPU memory object on a remote GPU, /// the client must point pRealGpuMem at a peer GPU memory object created on the input queue's device instead of the /// actual real GPU memory object created on the remote device. This is required for two reasons: /// 1. PAL can only view remote GPU memory using peer objects. /// 2. PAL enforces a separation of state between different IDevice object families. /// /// virtualStartOffset and size must be aligned to the virtualMemPageSize member of @ref DeviceProperties. /// realStartOffset must be aligned to the realMemAllocGranularity member of @ref DeviceProperties. struct VirtualMemoryRemapRange { IGpuMemory* pVirtualGpuMem; ///< Virtual GPU memory object whose mapping is being updated. gpusize virtualStartOffset; ///< Start of the page range to be updated, in bytes. IGpuMemory* pRealGpuMem; ///< Real GPU memory object the virtual range should point at. gpusize realStartOffset; ///< Start of the page range in the real GPU memory object, in bytes. gpusize size; ///< Size of the mapping range, in bytes. VirtualGpuMemAccessMode virtualAccessMode; ///< Access mode for virtual GPU memory's unmapped pages. /// This parameter is ignored on some platforms. }; /// Specifies a set of page mappings to copy between virtual GPU memory objects. The source and destination can be the /// same memory object and the source and destination regions may overlap. Input to IQueue::CopyVirtualMemoryPageMappings(). /// /// srcStartOffset, dstStartOffset, and size must be aligned to the virtualMemPageSize member of @ref DeviceProperties. struct VirtualMemoryCopyPageMappingsRange { IGpuMemory* pSrcGpuMem; ///< Virtual GPU memory object whose mapping is being copied from. gpusize srcStartOffset; ///< Start of the copy source range, in bytes. IGpuMemory* pDstGpuMem; ///< Virtual GPU memory object whose mapping is being copied to. gpusize dstStartOffset; ///< Start of the copy destination range, in bytes. gpusize size; ///< Size of the mapping range, in bytes. }; /// Specifies kernel level information about a context. struct KernelContextInfo { union { struct { uint32 hasDebugVmid : 1; ///< True if the context has acquired the debug vmid. uint32 hasHighPriorityVmid : 1; ///< True if the context has acquired the high priority vmid. uint32 reserved : 30; ///< Reserved for future use. }; uint32 u32All; ///< Flags packed as 32-bit uint. } flags; ///< Context flags. uint64 contextIdentifier; ///< Kernel scheduler context identifier. }; /** *********************************************************************************************************************** * @interface IQueue * @brief Represents a queue of work for a particular GPU engine on a device. * * An IQueue object is a virtual representation of a hardware engine on the device. Multiple IQueue objects can be * created and have work submitted on them in parallel. Work is submitted to a queue through @ref ICmdBuffer objects, * and work can be synchronized between multiple queues using @ref IQueueSemaphore objects. * * @see IDevice::GetQueue() *********************************************************************************************************************** */ class IQueue : public IDestroyable { public: /// Submits a group of root command buffers for execution on this queue. /// /// @param [in] submitInfo Specifies all command buffers to execute along with other residency and synchronization /// information. See @ref SubmitInfo for additional, important documentation. /// /// @returns Success if the command buffer was successfully submitted. Otherwise, one of the following errors may /// be returned: /// + ErrorInvalidPointer if: /// - any of the array inputs are null when their counts are non-zero. /// - any members of non-null point arrays are null. /// + ErrorTooManyMemoryReferences if the total number of memory references (device/queue global and /// per-command buffer) is too large. /// + ErrorInvalidValue if blockIfFlippingCount is too large. /// + ErrorIncompleteCommandBuffer if any of the submitted command buffers are not properly constructed. /// + ErrorIncompatibleQueue if any submitted command buffer does not match this queue's type (e.g., /// universal, graphics, DMA). virtual Result Submit( const MultiSubmitInfo& submitInfo) = 0; /// Waits for all previous submission on this queue to complete before control is returned to the caller. /// /// @returns Success if wait for submissions completed. Otherwise an error indicates reason for unsuccessful wait, /// for example due to lost device. virtual Result WaitIdle() = 0; /// Inserts a semaphore signal into the GPU queue. The semaphore will be signaled once all previously submitted /// work on this queue has completed. /// /// @param [in] pQueueSemaphore Semaphore to signal. /// @param [in] value timeline Semaphore point value to signal, ignored for non-timeline semaphores. /// /// @returns Success if the semaphore signal was successfully queued. Otherwise, one of the following errors may be /// returned: /// + ErrorUnknown if the OS scheduler rejects the signal for unknown reasons. virtual Result SignalQueueSemaphore( IQueueSemaphore* pQueueSemaphore, uint64 value = 0) = 0; /// Inserts a semaphore wait into the GPU queue. The queue will be stalled until the specified semaphore is /// signaled. /// /// @param [in] pQueueSemaphore Semaphore to wait on. /// @param [in] value timeline semaphore point value to wait on, ignored for non-timeline semaphores. /// /// @returns Success if the semaphore wait was successfully queued. Otherwise, one of the following errors may be /// returned: /// + ErrorUnknown if the OS scheduler rejects the wait for unknown reasons. virtual Result WaitQueueSemaphore( IQueueSemaphore* pQueueSemaphore, uint64 value = 0) = 0; #if PAL_KMT_BUILD /// Acquire the keyed mutex of shared GPU memory object (CPU sync) and then wait for the synchronization object of /// the shared GPU memory object (GPU sync based on fence). Note that the shared GPU memory object has to be /// a D3d11 resource created with (D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX | D3D11_RESOURCE_MISC_SHARED_NTHANDLE) /// misc flag. /// /// @param [in] pGpuMemory Shared GPU memory object on which keyed mutex and synchronization object are bound. /// [in] key Key of keyed mutex to be acquired. /// [in] timeout Timeout interval for keyed mutex acquiring, in milliseconds. /// /// @ returns Success if the keyed mutex has been successfully acquired and wait for the synchronization object /// has been successfully scheduled. Otherwise, one of the following errors may be returned: /// + ErrorUnknown if either the keyed mutex has not been successfully acuiqred or wait for the /// synchronization object has not been successfully scheduled. virtual Result KeyedMutexAcquireSync( IGpuMemory* pGpuMemory, uint64 key, std::chrono::milliseconds timeout) = 0; /// Signal the synchronization object of shared GPU memory object with bumped fence value and then release the /// keyed mutex of shared GPU memory object. Note that the shared GPU memory object has to be a D3d11 resource /// created with (D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX | D3D11_RESOURCE_MISC_SHARED_NTHANDLE) misc flag. /// /// @param [in] pGpuMemory Shared GPU memory object on which keyed mutex and synchronization object are /// bound. /// [in] key Key of keyed mutex to be released. /// /// @ returns Success if signal of the synchronization object has been successfully scheduled and the keyed mutex /// has been successfully released. Otherwise, one of the following errors may be returned: /// + ErrorUnknown if either signal of the synchronization object has not been successfully scheduled /// or keyed mutex has not been successfully released. virtual Result KeyedMutexReleaseSync( IGpuMemory* pGpuMemory, uint64 key) = 0; #endif /// This function passes application information to KMD for application specific power optimizations. /// Power configuration are restored to default when all application queues are destroyed. /// /// @param [in] pFileName Application executable name /// @param [in] pPathName Path to the application /// /// @returns Success if the information is passed successfully. Otherwise, one of the following errors may be /// returned: /// + Unsupported if this function is not available on this OS or if the queue context is null. /// + ErrorUnknown if an unexpected internal error occurs. virtual Result UpdateAppPowerProfile( const wchar_t* pFileName, const wchar_t* pPathName) = 0; /// Queues the specified image for presentation on the screen. This function directly queues the presentation /// request based on the input parameters without special synchronization considerations like a swap chain present. /// All previous work done on this queue will complete before the image is displayed. /// /// This function should never be called with a swap chain presentable image because it won't release ownership of /// the presentable image index, eventually deadlocking the swap chain. /// /// Overall support for direct presents can be queried at platform creation time via supportNonSwapChainPresents /// in @ref PlatformProperties. Support for particular present modes is specifed via supportedDirectPresentModes /// in @ref DeviceProperties. /// /// @note Any images specified in presentInfo must be made resident before calling this function. /// /// @param [in] presentInfo Specifies the source image and destination window for the present as well as other /// properties. /// /// @returns Success if the present was successfully queued. Otherwise, one of the following errors may be /// returned: /// + ErrorInvalidValue if the flip interval is invalid. /// + ErrorInvalidValue if the present mode doesn't match the capabilities of the image. /// + ErrorInvalidFlags if the present flags don't match the capabilities of the image. virtual Result PresentDirect( const PresentDirectInfo& presentInfo) = 0; /// Queues the specified image for presentation on the screen. This function uses the provided swap chain to /// determine exactly how the image should be presented (e.g., can the user see tearing). See @ref ISwapChain for /// more information on swap chain presentation. All previous work done on this queue will complete before the /// image is displayed, but future work may execute before the present is completed because swap chain present /// execution may be asynchronous to the queue that initiated present. /// /// Assuming the presentInfo is valid, this function will always release ownership of the presentable image index /// even if PAL encounters an error while executing the present. /// /// Queue support for swap chain presents is specified via supportsSwapChainPresents in @ref DeviceProperties. /// Support for particular PresentModes is queried per SwapChainMode via IDevice::GetSwapChainInfo(). /// /// @note The source image specified in presentInfo must be made resident before calling this function. /// /// @param [in] presentInfo Specifies the source image, swap chain, and basic presentation information. /// /// @returns Success if the present was successfully queued. Otherwise, one of the following errors may be /// returned: /// + ErrorInvalidPointer if the source image or swap chain are null. /// + ErrorInvalidValue if the present mode doesn't match the capabilities of the image or if the image /// index isn't valid within the swap chain. virtual Result PresentSwapChain( const PresentSwapChainInfo& presentInfo) = 0; /// Inserts a delay of a specified amount of time before processing more commands on this queue. /// /// Only available on timer queues. Useful in conjunction with queue semaphores to implement frame pacing. /// /// @param [in] delay Time, in milliseconds, to delay before processing more commands on this queue. /// /// @returns Success if the delay was successfully queued. Otherwise, one of the following errors may be returned: /// + ErrorInvalidValue if delay is less than 0. virtual Result Delay( Util::fmilliseconds delay) = 0; /// Inserts a delay of a specified amount of time on this queue after a vsync on a private display object. /// /// Only available on timer queues. Useful in conjunction with queue semaphores to implement pacing of GPU and CPU /// operations for rendering and presentation in VR as this allows GPU commands of next frame to be sent early but /// blocks GPU execution until after vsync. /// /// @param [in] delay Time, in microseconds, to delay before processing more commands on this queue. /// @param [in] pScreen The private screen object that the vsync is occurring and the delay is waiting on. /// /// @returns Success if the delay was successfully queued. Otherwise, one of the following errors may be returned: /// + ErrorInvalidValue if delay is less than 0. virtual Result DelayAfterVsync( Util::fmicroseconds delay, const IPrivateScreen* pScreen) = 0; /// Updates page mappings for virtual GPU memory allocations. /// /// @param [in] rangeCount Number of ranges to remap (i.e., size of the pRanges array). /// @param [in] pRanges Defines the set of remappings from virtual GPU memory object pages to real GPU /// memory object pages. /// @param [in] doNotWait If true, then this paging operation will be executed on the Queue immediately, without /// waiting for any previous rendering to finish first. On platforms that don't support /// this, the flag will be ignored. /// @param [in] pFence Optional. Pointer to an IFence, which will be signaled after the VA remapping. /// /// @returns Success if the remappings were executed successfully. It is assumed that the following conditions are /// met for the input to this function: /// + rangeCount is not 0. /// + The page range for all members of pRanges are valid. /// + pRanges is not null. /// + pVirtualGpuMem is not null for any member of pRanges. /// + pRanges does not specify a real GPU memory object as a virtual GPU memory object or vice versa. virtual Result RemapVirtualMemoryPages( uint32 rangeCount, const VirtualMemoryRemapRange* pRanges, bool doNotWait, IFence* pFence) = 0; /// Copies page mappings from one virtual GPU memory object to another. /// /// @param [in] rangeCount Number of ranges to copy (i.e., size of the pRanges array). /// @param [in] pRanges Defines the set of page mappings to copy between virtual GPU memory objects. /// @param [in] doNotWait If true, then this paging operation will be executed on the Queue immediately, without /// waiting for any previous rendering to finish first. On platforms that don't support /// this, the flag will be ignored. /// /// @returns Success if the mappings were copied successfully. It is assumed that the following conditions are /// met for the input to this function: /// + rangeCount is not 0. /// + The page range for all members of pRanges are valid. /// + pRanges is not null. /// + pSrcGpuMem or pDstGpuMem is not null for any member of pRanges. /// + pRanges does not specify a real GPU memory object as source or destination virtual Result CopyVirtualMemoryPageMappings( uint32 rangeCount, const VirtualMemoryCopyPageMappingsRange* pRanges, bool doNotWait) = 0; /// Associates the provided Fence object with the last submission on this queue object. The Fence can be used via /// GetStatus() to get the status of the last Submit, however no event will be created/set for the Fence so /// WaitForFences() should NOT be called on the fence after this association. /// /// @see IFence::GetStatus() /// @see IFence::WaitForFences() /// /// @param [in] pFence Fence object to be associated with the last Submit on this queue /// /// @returns Success if the association was successful. ErrorUnavailable will be returned in there has not yet been /// a Submit on this queue. virtual Result AssociateFenceWithLastSubmit( IFence* pFence) = 0; /// Set execution priority for the current queue, it allows to elevate execution priority of submitted command /// buffers, but it has no effect on command buffers that have already been submitted for execution. Elevating /// the queue priority to medium or high would allow to temporary stall a low priority queue execution and execute /// its work as soon as the low priority queue starts draining. /// /// @param [in] priority The priority level of the queue. virtual void SetExecutionPriority( QueuePriority priority) = 0; /// Returns a list of GPU memory allocations used by this queue. /// /// @param [in,out] pNumEntries Input value specifies the available size in pAllocInfoList; output value /// reports the number of GPU memory allocations. /// @param [out] pAllocInfoList If pAllocInfoList=nullptr, then pNumEntries is ignored on input. On output it /// will reflect the number of allocations that make up this queue. If /// pAllocInfoList!=nullptr, then on input pNumEntries is assumed to be the number /// of entries in the pAllocInfoList array. On output, pNumEntries reflects the /// number of entries in pAllocInfoList that are valid. /// @returns Success if the allocation info was successfully written to the buffer. /// + ErrorInvalidValue if the caller provides a buffer size that is different from the size needed. /// + ErrorInvalidPointer if pNumEntries is nullptr. virtual Result QueryAllocationInfo( size_t* pNumEntries, GpuMemSubAllocInfo* const pAllocInfoList) = 0; /// Returns the QueueType for the queue virtual QueueType Type() const = 0; /// Returns the EngineType for the queue virtual EngineType GetEngineType() const = 0; /// Queries the kernel context info associated with this queue and copies it into pKernelContextInfo. /// /// Only supported on Windows platforms. /// /// @param [out] pKernelContextInfo Pointer to a KernelContextInfo struct to copy the information into. /// @returns Success if the information is successfully copied into the output struct. /// + ErrorInvalidPointer if pKernelContextInfo is nullptr. /// + ErrorUnavailable if kernel context information is not available on the current platform. virtual Result QueryKernelContextInfo(KernelContextInfo* pKernelContextInfo) const = 0; /// Returns the value of the associated arbitrary client data pointer. /// Can be used to associate arbitrary data with a particular PAL object. /// /// @returns Pointer to client data. void* GetClientData() const { return m_pClientData; } /// Sets the value of the associated arbitrary client data pointer. /// Can be used to associate arbitrary data with a particular PAL object. /// /// @param [in] pClientData A pointer to arbitrary client data. void SetClientData( void* pClientData) { m_pClientData = pClientData; } protected: /// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly /// called the proper create method. IQueue() : m_pClientData(nullptr) {} /// @internal Destructor. Prevent use of delete operator on this interface. Queues will be destroyed when the /// associated device is destroyed. virtual ~IQueue() { } private: /// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData() /// and set via SetClientData(). /// For non-top-layer objects, this will point to the layer above the current object. void* m_pClientData; }; } // Pal