Update amdgpu-windows-interop with latest changes (#1718)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
systems-assistant[bot]
2025-11-05 21:13:32 +01:00
committed by GitHub
vanhempi 280cda3196
commit 321e497048
136 muutettua tiedostoa jossa 44376 lisäystä ja 44160 poistoa
File diff suppressed because it is too large Load Diff
@@ -1,204 +1,204 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palCmdAllocator.h
* @brief Defines the Platform Abstraction Library (PAL) ICmdAllocator interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
// Forward declarations.
struct GpuMemSubAllocInfo;
class IGpuMemory;
/// Flags controlling the creation of ICmdAllocator objects.
union CmdAllocatorCreateFlags
{
struct
{
uint32 threadSafe : 1; ///< If set, the allocator will acquire a lock each time it is accessed;
/// otherwise it will not attempt to protect itself from multithreaded
/// access.
uint32 autoMemoryReuse : 1; ///< If set, the allocator will track when the GPU finishes accessing
/// each piece of command memory and attempt to reuse memory which the
/// GPU is done with before allocating more memory from the OS. If not
/// set, memory will only be recycled after a call to
/// @ref ICmdAllocator::Reset().
uint32 disableBusyChunkTracking : 1; ///< If set, the allocator will not do any GPU-side tracking of which
/// command chunks are still in use. It will be the client's (or the
/// application's) responsibility to guarantee that command chunks are
/// not returned to the allocator before the GPU has finished processing
/// them. Failure to guarantee this will result in undefined behavior.
/// This flag has no effect if @ref autoMemoryReuse is not set.
uint32 autoTrimMemory : 1; ///< If set the allocator will automatically trim down the allocations
/// (where all chunks are idle on the freeList). A minimum of
/// allocFreeThreshold allocations are kept for fast reuse.
uint32 reserved : 28; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
};
/// Different type of allocation data that an ICmdAllocator allocates and distributes to command buffers.
enum CmdAllocType : uint32
{
CommandDataAlloc = 0, ///< Data allocated is for executable commands.
EmbeddedDataAlloc, ///< Data allocated is for embedded data.
LargeEmbeddedDataAlloc, ///< Data allocated is for embedded data, allocation is >32kb
GpuScratchMemAlloc, ///< Data allocated is GPU-only accessible at command buffer execution-time. Possible
/// uses like GPU events.
CmdAllocatorTypeCount ///< Number of allocation types for ICmdAllocator's.
};
/// Specifies properties for creation of an ICmdAllocator object. Input structure to IDevice::CreateCmdAllocator().
struct CmdAllocatorCreateInfo
{
CmdAllocatorCreateFlags flags; ///< Flags controlling command allocator creation.
struct
{
GpuHeap allocHeap; ///< Preferred allocation heap. For @ref GpuScratchMemAlloc, this field is
/// ignored and the allocation will always be in GPU-invisible memory. For
/// all other allocation types, this must be CPU-mappable.
/// For best performance, command allocators that will be used by the
/// UVD engine should prefer the Local heap
gpusize allocSize; ///< Size, in bytes, of the GPU memory allocations this allocator will create.
/// It must be an integer multiple of suballocSize.
gpusize suballocSize; ///< Size, in bytes, of the chunks of GPU memory this allocator will give to
/// command buffers. It must be an integer multiple of 4096.
/// Must be greater than zero even if the client doesn't plan on using this
/// allocation type.
uint32 allocFreeThreshold; ///< Minimum count of free allocations that the allocator should keep around
/// for fast reuse. It is used when the autoTrimMemory flag is set.
} allocInfo[CmdAllocatorTypeCount]; ///< Information for each allocation type.
};
/// Output structure for QueryUtilizationInfo().
/// The CmdAllocator utilization data can be queried by PAL clients in order to decide whether to trim the allocations.
struct CmdAllocatorUtilizationInfo
{
uint32 numAllocations; ///< Number of allocations owned by the allocator.
uint32 numFreeChunks; ///< Number of chunks that are reset and not in use.
uint32 numBusyChunks; ///< Number of chunks that in use by the GPU.
uint32 numReuseChunks; ///< Number of chunks that have been 'returned' to the allocator for reuse.
};
/**
***********************************************************************************************************************
* @interface ICmdAllocator
* @brief Allocates and distributes GPU memory to command buffers on the client's behalf.
*
* All ICmdBuffer objects must be associated with an ICmdAllocator at creation. Command buffers may switch command
* allocators when ICmdBuffer::Reset() is called. The set of command buffers associated with a given command allocator
* will query that allocator for additional GPU memory as they are building commands.
*
* To protect against race conditions the client must ask for a thread safe command allocator unless its can guarantee
* that all command buffers associated with a given command allocator will be built, reset, and destroyed in a thread-
* safe manner. It is illegal to destroy a command allocator while it still has command buffers associated with it.
*
* @see IDevice::CreateCmdAllocator()
***********************************************************************************************************************
*/
class ICmdAllocator : public IDestroyable
{
public:
/// Explicitly resets a command allocator, marking all internal GPU memory allocations as unused.
///
/// The client is responsible for guaranteeing that all command buffers associated with this allocator have finished
/// GPU execution and have been explicitly reset before calling this function.
///
/// @param [in] freeMemory If the all GPU and CPU memory allocations should be returned to the OS.
///
/// @returns Success if the command allocator was successfully reset. Otherwise, one of the following errors may be
/// returned:
/// + ErrorUnknown if an internal PAL error occurs.
virtual Result Reset(bool freeMemory) = 0;
/// Explicitly trims a command allocator, deleting as many unused internal GPU memory allocations as possible.
///
/// @returns Success if the command allocator was successfully trimmed.
///
/// @param [in] allocTypeMask Gives control whether trimming will be applied for each CmdAllocType.
/// Use (1 << CmdAllocatorTypeCount) - 1 to apply trimming to all types.
/// When trimming only the embedded date use (1 << EmbeddedDataAlloc).
/// @param [in] dynamicThreshold Minimum count of free allocations that the allocator should keep around
virtual Result Trim(uint32 allocTypeMask, uint32 dynamicThreshold) = 0;
/// Query the numbers of allocations and chunks of the given CmdAllocator type.
/// This may help clients to decide whether they may apply trimming or not.
///
/// @returns Success if valid values can be reported.
///
/// @param [in] type CmdAllocType that is being queried
/// @param [out] pUtilizationInfo The allocation and chunk counts will be stored here.
virtual Result QueryUtilizationInfo(CmdAllocType type, CmdAllocatorUtilizationInfo* pUtilizationInfo) const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
ICmdAllocator() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~ICmdAllocator() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palCmdAllocator.h
* @brief Defines the Platform Abstraction Library (PAL) ICmdAllocator interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
// Forward declarations.
struct GpuMemSubAllocInfo;
class IGpuMemory;
/// Flags controlling the creation of ICmdAllocator objects.
union CmdAllocatorCreateFlags
{
struct
{
uint32 threadSafe : 1; ///< If set, the allocator will acquire a lock each time it is accessed;
/// otherwise it will not attempt to protect itself from multithreaded
/// access.
uint32 autoMemoryReuse : 1; ///< If set, the allocator will track when the GPU finishes accessing
/// each piece of command memory and attempt to reuse memory which the
/// GPU is done with before allocating more memory from the OS. If not
/// set, memory will only be recycled after a call to
/// @ref ICmdAllocator::Reset().
uint32 disableBusyChunkTracking : 1; ///< If set, the allocator will not do any GPU-side tracking of which
/// command chunks are still in use. It will be the client's (or the
/// application's) responsibility to guarantee that command chunks are
/// not returned to the allocator before the GPU has finished processing
/// them. Failure to guarantee this will result in undefined behavior.
/// This flag has no effect if @ref autoMemoryReuse is not set.
uint32 autoTrimMemory : 1; ///< If set the allocator will automatically trim down the allocations
/// (where all chunks are idle on the freeList). A minimum of
/// allocFreeThreshold allocations are kept for fast reuse.
uint32 reserved : 28; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
};
/// Different type of allocation data that an ICmdAllocator allocates and distributes to command buffers.
enum CmdAllocType : uint32
{
CommandDataAlloc = 0, ///< Data allocated is for executable commands.
EmbeddedDataAlloc, ///< Data allocated is for embedded data.
LargeEmbeddedDataAlloc, ///< Data allocated is for embedded data, allocation is >32kb
GpuScratchMemAlloc, ///< Data allocated is GPU-only accessible at command buffer execution-time. Possible
/// uses like GPU events.
CmdAllocatorTypeCount ///< Number of allocation types for ICmdAllocator's.
};
/// Specifies properties for creation of an ICmdAllocator object. Input structure to IDevice::CreateCmdAllocator().
struct CmdAllocatorCreateInfo
{
CmdAllocatorCreateFlags flags; ///< Flags controlling command allocator creation.
struct
{
GpuHeap allocHeap; ///< Preferred allocation heap. For @ref GpuScratchMemAlloc, this field is
/// ignored and the allocation will always be in GPU-invisible memory. For
/// all other allocation types, this must be CPU-mappable.
/// For best performance, command allocators that will be used by the
/// UVD engine should prefer the Local heap
gpusize allocSize; ///< Size, in bytes, of the GPU memory allocations this allocator will create.
/// It must be an integer multiple of suballocSize.
gpusize suballocSize; ///< Size, in bytes, of the chunks of GPU memory this allocator will give to
/// command buffers. It must be an integer multiple of 4096.
/// Must be greater than zero even if the client doesn't plan on using this
/// allocation type.
uint32 allocFreeThreshold; ///< Minimum count of free allocations that the allocator should keep around
/// for fast reuse. It is used when the autoTrimMemory flag is set.
} allocInfo[CmdAllocatorTypeCount]; ///< Information for each allocation type.
};
/// Output structure for QueryUtilizationInfo().
/// The CmdAllocator utilization data can be queried by PAL clients in order to decide whether to trim the allocations.
struct CmdAllocatorUtilizationInfo
{
uint32 numAllocations; ///< Number of allocations owned by the allocator.
uint32 numFreeChunks; ///< Number of chunks that are reset and not in use.
uint32 numBusyChunks; ///< Number of chunks that in use by the GPU.
uint32 numReuseChunks; ///< Number of chunks that have been 'returned' to the allocator for reuse.
};
/**
***********************************************************************************************************************
* @interface ICmdAllocator
* @brief Allocates and distributes GPU memory to command buffers on the client's behalf.
*
* All ICmdBuffer objects must be associated with an ICmdAllocator at creation. Command buffers may switch command
* allocators when ICmdBuffer::Reset() is called. The set of command buffers associated with a given command allocator
* will query that allocator for additional GPU memory as they are building commands.
*
* To protect against race conditions the client must ask for a thread safe command allocator unless its can guarantee
* that all command buffers associated with a given command allocator will be built, reset, and destroyed in a thread-
* safe manner. It is illegal to destroy a command allocator while it still has command buffers associated with it.
*
* @see IDevice::CreateCmdAllocator()
***********************************************************************************************************************
*/
class ICmdAllocator : public IDestroyable
{
public:
/// Explicitly resets a command allocator, marking all internal GPU memory allocations as unused.
///
/// The client is responsible for guaranteeing that all command buffers associated with this allocator have finished
/// GPU execution and have been explicitly reset before calling this function.
///
/// @param [in] freeMemory If the all GPU and CPU memory allocations should be returned to the OS.
///
/// @returns Success if the command allocator was successfully reset. Otherwise, one of the following errors may be
/// returned:
/// + ErrorUnknown if an internal PAL error occurs.
virtual Result Reset(bool freeMemory) = 0;
/// Explicitly trims a command allocator, deleting as many unused internal GPU memory allocations as possible.
///
/// @returns Success if the command allocator was successfully trimmed.
///
/// @param [in] allocTypeMask Gives control whether trimming will be applied for each CmdAllocType.
/// Use (1 << CmdAllocatorTypeCount) - 1 to apply trimming to all types.
/// When trimming only the embedded date use (1 << EmbeddedDataAlloc).
/// @param [in] dynamicThreshold Minimum count of free allocations that the allocator should keep around
virtual Result Trim(uint32 allocTypeMask, uint32 dynamicThreshold) = 0;
/// Query the numbers of allocations and chunks of the given CmdAllocator type.
/// This may help clients to decide whether they may apply trimming or not.
///
/// @returns Success if valid values can be reported.
///
/// @param [in] type CmdAllocType that is being queried
/// @param [out] pUtilizationInfo The allocation and chunk counts will be stored here.
virtual Result QueryUtilizationInfo(CmdAllocType type, CmdAllocatorUtilizationInfo* pUtilizationInfo) const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
ICmdAllocator() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~ICmdAllocator() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
File diff suppressed because it is too large Load Diff
@@ -1,370 +1,370 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palCmdTracking.h
* @brief Defines a number of support classes used for construction and storage of struct TrackedCmdLocation
* defined in trackedCmdLocation.h
*
* - struct TrackingEventInfo: A single from uint8 to name, used for logging
* - class TrackedCmdSupportBase A set of TrackingEventInfo, maintained outside of Pal
* - class TrackedCmdLocationArray The arrays for TrackedCmdLocation's used for reporting
* correlation data through ICmdBufferReporting::CorrelationReportOnSubmit
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palVector.h"
#include "trackedCmdLocation.h"
namespace Pal
{
// forward decl
class Platform;
namespace CmdDisassembly
{
// forward definition
class TrackedCmdLocationArray;
/**
************************************************************************************************************************
* @brief class TrackedCmdLocationRef
* A copyable reference to a member in a TrackedCmdLocationArray, invariant to that array be
* re-allocated.
*
* @detail Is simply a pointer to a TrackedCmdLocationArray, and an index in to that array
*
************************************************************************************************************************
*/
class TrackedCmdLocationRef
{
public:
TrackedCmdLocationRef()
: m_pSourceArray(nullptr),
m_index(0)
{
}
TrackedCmdLocationRef(
TrackedCmdLocationArray* pSourceArray,
Util::uint32 index)
: m_pSourceArray(pSourceArray),
m_index(index)
{
}
TrackedCmdLocationRef(
TrackedCmdLocationRef&& other) = default;
TrackedCmdLocationRef(
TrackedCmdLocationRef const& other) = default;
TrackedCmdLocationRef& operator=(
TrackedCmdLocationRef&& other) = default;
TrackedCmdLocationRef& operator=(
TrackedCmdLocationRef const& other) = default;
bool operator==(
TrackedCmdLocationRef const& other) const
{ return (this->m_pSourceArray == other.m_pSourceArray) && (this->m_index == other.m_index); }
bool operator!=(
TrackedCmdLocationRef const& other) const
{ return (this->m_pSourceArray != other.m_pSourceArray) || (this->m_index != other.m_index); }
TrackedCmdLocation* Use();
const TrackedCmdLocation* Get() const;
Util::uint32 GetIndex() const
{
return m_index;
}
/// Helper functions
///
/// Clears the TrackedCmdLocation referred to by this TrackedCmdLocationRef
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result Clear();
/// @returns
/// TrackedCmdLocationMode::Invalid if (IsValid() == false)
/// Get()->m_mode otherwise
TrackedCmdLocationMode GetMode() const;
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::Before
///
/// @param [in] eventId Refers to an uint8 event that has a begin and/or an end associated with it
/// Most likely, a value registered to a TrackedCmdSupportBase
/// @param [in] beforePtr The end pointer for the cmdList being tracked before the event referred to by eventId
/// Only 48-bits of beforePtr are used
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsBefore(
uint8 eventId,
uint64 beforePtr);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::After
///
/// @param [in] eventId Refers to an uint8 event that has a begin and/or an end associated with it
/// Most likely, a value registered to a TrackedCmdSupportBase
/// @param [in] afterPtr The end pointer for the cmdList being tracked after the event referred to by eventId
/// Only 48-bits of afterPtr are used
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsAfter(
uint8 eventId,
uint64 afterPtr);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::Delta, with no begin or end (ie, no data can be written to
/// the cmdList being tracked "during" the event referred to be eventId
///
/// @param [in] eventId Refers to an uint8 event that does not have a begin and/or an end associated with it
/// Such as Pal::CmdDisassembly::TrackedCmdLocation::PostClientEvent
/// @param [in] ptr The end pointer for the cmdList being tracked after the event referred to by eventId
/// Only 48-bits of ptr are used
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsEmptyDelta(
uint8 eventId,
uint64 ptr);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::ClientId
///
/// @param [in] clientId A 61-bit bit value used by the client application to identify which cmdList is being
/// tracked
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsClientId(
uint64 clientId);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::ClientEventId
///
/// @param [in] clientEventId A 61-bit bit value used by the client application to identify
/// a client event relative to the current end position of the cmdList being tracked
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsClientEvent(
uint64 clientEventId);
/// @brief bool TrackedCmdLocation::TrySetAsDelta(uint64 afterPtr)
/// Will attempt to set this TrackedCmdLocation to type TrackedCmdLocationMode::Delta
///
/// @detail If GetMode() == TrackedCmdLocationMode::Before and afterPtr - m_correlateInternal.m_ptr is small
/// enough to be encoded in m_correlateInternal.m_deltaInDWords, the mode will be altered to
/// TrackedCmdLocationMode::Delta, with afterPtr - m_correlateInternal.m_ptr encoded in
/// m_correlateInternal.m_deltaInDWords.
/// If this attempt fails, the calling function should instead create a TrackedCmdLocationMode::After
/// TrackedCmdLocation
///
/// @param [in] afterPtr, the value a TrackedCmdLocationMode::After would have for m_correlateInternal.m_ptr
/// @return Result::Success if it was possible to set this TrackedCmdLocation to type
/// TrackedCmdLocationMode::Delta
/// Result::Unsupported if the conditions described above are not met.
Result TrySetAsDelta(
uint64 afterPtr);
private:
TrackedCmdLocationArray* m_pSourceArray;
Util::uint32 m_index;
Result SetMode(
TrackedCmdLocationMode mode);
};
/// @brief struct TrackingEventInfo
/// Essentially just a name, plus a boolean to indicate whether the name is valid / has been set
struct TrackingEventInfo
{
Util::StringView<char> name;
bool isValid;
TrackingEventInfo()
: isValid(false)
{}
};
/**
************************************************************************************************************************
* @brief class TrackedCmdSupportBase translates eventId's to strings for internal correlation events
*
* @detail For use in Pal::Queue when dumping to text files. Corresponds to
* TrackedCmdLocation::m_correlateInternal.m_event for the cases where TrackedCmdLocation::m_mode
* is not TrackedCmdLocationMode::ClientEvent
*
* The implementation for this is in whatever client of Pal that is creating the internal correlation events,
*
************************************************************************************************************************
*/
class TrackedCmdSupportBase
{
public:
virtual ~TrackedCmdSupportBase() = default;
void SetEventIdName(
uint8 eventId,
const char* name)
{
PAL_ASSERT(static_cast<uint32>(eventId) < NumUInt8Values);
m_allEventsMap[eventId].name = name;
m_allEventsMap[eventId].isValid = true;
}
TrackingEventInfo const& GetEventInfo(
uint8 eventId) const
{
PAL_ASSERT(static_cast<uint32>(eventId) < NumUInt8Values);
return m_allEventsMap[eventId];
}
protected:
static constexpr uint32 NumUInt8Values = UINT8_MAX + 1;
TrackingEventInfo m_allEventsMap[NumUInt8Values];
TrackedCmdSupportBase() = default;
};
/**
************************************************************************************************************************
* @brief class TrackedCmdLocationArray is simple a TrackedCmdLocationVec together with a clientId
* and some helpers. TrackedCmdLocationArray live on Pal::GfxCmdBuffer
*
* @detail Each Pal::GfxCmdBuffer has at most CmdDisassembly::MaxNumSubCmdBuffers TrackedCmdLocationArray's
* corresponding to Pal::GfxCmdBuffer::NumCmdStreams();
*
* The clientId used for TrackedCmdLocationArray::m_clientId, corresponds to the client Id used in
* TrackedCmdLocation::m_clientId.m_clientId
*
* For the moment, the underlying implementation used is
* Util::Vector<TrackedCmdLocation, DefaultCapacity, Pal::Platform>, but could be changed to use a Chunk
* scheme, especially as sizes of cmdLists can become very large.
* The only requirement to a change, is for TrackedCmdLocationRef continues to function as an accessor
*
* Note that the functions in TrackedCmdLocationArray are not designed for thread-safety, as they are
* issued from command-list-building functions that are, in their turn, not thread safe. Adding mutex
* behavior here would potentially hide issues relating to thread-safety.
*
************************************************************************************************************************
*/
class TrackedCmdLocationArray
{
public:
static constexpr uint32 DefaultCapacity = 1024;
static constexpr uint32 BadIndex = UINT32_MAX;
static constexpr uint64 InvalidClientId = UINT64_MAX;
typedef Util::Vector<TrackedCmdLocation, DefaultCapacity, Pal::Platform> TrackedCmdLocationVec;
static uint32 GetTrackedCmdLocationArraySizeInBytes()
{
return sizeof(TrackedCmdLocationArray);
}
static TrackedCmdLocationArray* CreateTrackedCmdLocationArray(
void* pMemory,
Pal::Platform* pPlatform);
void Reset()
{
m_lastLocation = TrackedCmdLocationRef(this, BadIndex);
m_clientId = InvalidClientId;
m_locations.Clear();
}
void Destroy();
uint64 GetClientId() const
{
return m_clientId;
}
Result SetClientId(
uint64 clientId);
Util::uint32 GetTotalSize() const
{
return m_locations.size();
}
const TrackedCmdLocationVec& GetLocationsVec() const
{
return m_locations;
}
TrackedCmdLocationVec& UseLocationsVec()
{
return m_locations;
}
Pal::Result MakeNext(
TrackedCmdLocationRef* pResult);
const TrackedCmdLocationRef GetLast() const
{
return m_lastLocation;
}
bool IsLast(
TrackedCmdLocationRef const& location) const
{
return location == m_lastLocation;
}
private:
TrackedCmdLocationVec m_locations;
Pal::Platform* m_pPlatform;
uint64 m_clientId;
TrackedCmdLocationRef m_lastLocation;
TrackedCmdLocationArray(
Pal::Platform* pPlatform);
~TrackedCmdLocationArray() = default;
};
} // namespace CmdDisassembly
} // namespace Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palCmdTracking.h
* @brief Defines a number of support classes used for construction and storage of struct TrackedCmdLocation
* defined in trackedCmdLocation.h
*
* - struct TrackingEventInfo: A single from uint8 to name, used for logging
* - class TrackedCmdSupportBase A set of TrackingEventInfo, maintained outside of Pal
* - class TrackedCmdLocationArray The arrays for TrackedCmdLocation's used for reporting
* correlation data through ICmdBufferReporting::CorrelationReportOnSubmit
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palVector.h"
#include "trackedCmdLocation.h"
namespace Pal
{
// forward decl
class Platform;
namespace CmdDisassembly
{
// forward definition
class TrackedCmdLocationArray;
/**
************************************************************************************************************************
* @brief class TrackedCmdLocationRef
* A copyable reference to a member in a TrackedCmdLocationArray, invariant to that array be
* re-allocated.
*
* @detail Is simply a pointer to a TrackedCmdLocationArray, and an index in to that array
*
************************************************************************************************************************
*/
class TrackedCmdLocationRef
{
public:
TrackedCmdLocationRef()
: m_pSourceArray(nullptr),
m_index(0)
{
}
TrackedCmdLocationRef(
TrackedCmdLocationArray* pSourceArray,
Util::uint32 index)
: m_pSourceArray(pSourceArray),
m_index(index)
{
}
TrackedCmdLocationRef(
TrackedCmdLocationRef&& other) = default;
TrackedCmdLocationRef(
TrackedCmdLocationRef const& other) = default;
TrackedCmdLocationRef& operator=(
TrackedCmdLocationRef&& other) = default;
TrackedCmdLocationRef& operator=(
TrackedCmdLocationRef const& other) = default;
bool operator==(
TrackedCmdLocationRef const& other) const
{ return (this->m_pSourceArray == other.m_pSourceArray) && (this->m_index == other.m_index); }
bool operator!=(
TrackedCmdLocationRef const& other) const
{ return (this->m_pSourceArray != other.m_pSourceArray) || (this->m_index != other.m_index); }
TrackedCmdLocation* Use();
const TrackedCmdLocation* Get() const;
Util::uint32 GetIndex() const
{
return m_index;
}
/// Helper functions
///
/// Clears the TrackedCmdLocation referred to by this TrackedCmdLocationRef
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result Clear();
/// @returns
/// TrackedCmdLocationMode::Invalid if (IsValid() == false)
/// Get()->m_mode otherwise
TrackedCmdLocationMode GetMode() const;
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::Before
///
/// @param [in] eventId Refers to an uint8 event that has a begin and/or an end associated with it
/// Most likely, a value registered to a TrackedCmdSupportBase
/// @param [in] beforePtr The end pointer for the cmdList being tracked before the event referred to by eventId
/// Only 48-bits of beforePtr are used
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsBefore(
uint8 eventId,
uint64 beforePtr);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::After
///
/// @param [in] eventId Refers to an uint8 event that has a begin and/or an end associated with it
/// Most likely, a value registered to a TrackedCmdSupportBase
/// @param [in] afterPtr The end pointer for the cmdList being tracked after the event referred to by eventId
/// Only 48-bits of afterPtr are used
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsAfter(
uint8 eventId,
uint64 afterPtr);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::Delta, with no begin or end (ie, no data can be written to
/// the cmdList being tracked "during" the event referred to be eventId
///
/// @param [in] eventId Refers to an uint8 event that does not have a begin and/or an end associated with it
/// Such as Pal::CmdDisassembly::TrackedCmdLocation::PostClientEvent
/// @param [in] ptr The end pointer for the cmdList being tracked after the event referred to by eventId
/// Only 48-bits of ptr are used
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsEmptyDelta(
uint8 eventId,
uint64 ptr);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::ClientId
///
/// @param [in] clientId A 61-bit bit value used by the client application to identify which cmdList is being
/// tracked
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsClientId(
uint64 clientId);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::ClientEventId
///
/// @param [in] clientEventId A 61-bit bit value used by the client application to identify
/// a client event relative to the current end position of the cmdList being tracked
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsClientEvent(
uint64 clientEventId);
/// @brief bool TrackedCmdLocation::TrySetAsDelta(uint64 afterPtr)
/// Will attempt to set this TrackedCmdLocation to type TrackedCmdLocationMode::Delta
///
/// @detail If GetMode() == TrackedCmdLocationMode::Before and afterPtr - m_correlateInternal.m_ptr is small
/// enough to be encoded in m_correlateInternal.m_deltaInDWords, the mode will be altered to
/// TrackedCmdLocationMode::Delta, with afterPtr - m_correlateInternal.m_ptr encoded in
/// m_correlateInternal.m_deltaInDWords.
/// If this attempt fails, the calling function should instead create a TrackedCmdLocationMode::After
/// TrackedCmdLocation
///
/// @param [in] afterPtr, the value a TrackedCmdLocationMode::After would have for m_correlateInternal.m_ptr
/// @return Result::Success if it was possible to set this TrackedCmdLocation to type
/// TrackedCmdLocationMode::Delta
/// Result::Unsupported if the conditions described above are not met.
Result TrySetAsDelta(
uint64 afterPtr);
private:
TrackedCmdLocationArray* m_pSourceArray;
Util::uint32 m_index;
Result SetMode(
TrackedCmdLocationMode mode);
};
/// @brief struct TrackingEventInfo
/// Essentially just a name, plus a boolean to indicate whether the name is valid / has been set
struct TrackingEventInfo
{
Util::StringView<char> name;
bool isValid;
TrackingEventInfo()
: isValid(false)
{}
};
/**
************************************************************************************************************************
* @brief class TrackedCmdSupportBase translates eventId's to strings for internal correlation events
*
* @detail For use in Pal::Queue when dumping to text files. Corresponds to
* TrackedCmdLocation::m_correlateInternal.m_event for the cases where TrackedCmdLocation::m_mode
* is not TrackedCmdLocationMode::ClientEvent
*
* The implementation for this is in whatever client of Pal that is creating the internal correlation events,
*
************************************************************************************************************************
*/
class TrackedCmdSupportBase
{
public:
virtual ~TrackedCmdSupportBase() = default;
void SetEventIdName(
uint8 eventId,
const char* name)
{
PAL_ASSERT(static_cast<uint32>(eventId) < NumUInt8Values);
m_allEventsMap[eventId].name = name;
m_allEventsMap[eventId].isValid = true;
}
TrackingEventInfo const& GetEventInfo(
uint8 eventId) const
{
PAL_ASSERT(static_cast<uint32>(eventId) < NumUInt8Values);
return m_allEventsMap[eventId];
}
protected:
static constexpr uint32 NumUInt8Values = UINT8_MAX + 1;
TrackingEventInfo m_allEventsMap[NumUInt8Values];
TrackedCmdSupportBase() = default;
};
/**
************************************************************************************************************************
* @brief class TrackedCmdLocationArray is simple a TrackedCmdLocationVec together with a clientId
* and some helpers. TrackedCmdLocationArray live on Pal::GfxCmdBuffer
*
* @detail Each Pal::GfxCmdBuffer has at most CmdDisassembly::MaxNumSubCmdBuffers TrackedCmdLocationArray's
* corresponding to Pal::GfxCmdBuffer::NumCmdStreams();
*
* The clientId used for TrackedCmdLocationArray::m_clientId, corresponds to the client Id used in
* TrackedCmdLocation::m_clientId.m_clientId
*
* For the moment, the underlying implementation used is
* Util::Vector<TrackedCmdLocation, DefaultCapacity, Pal::Platform>, but could be changed to use a Chunk
* scheme, especially as sizes of cmdLists can become very large.
* The only requirement to a change, is for TrackedCmdLocationRef continues to function as an accessor
*
* Note that the functions in TrackedCmdLocationArray are not designed for thread-safety, as they are
* issued from command-list-building functions that are, in their turn, not thread safe. Adding mutex
* behavior here would potentially hide issues relating to thread-safety.
*
************************************************************************************************************************
*/
class TrackedCmdLocationArray
{
public:
static constexpr uint32 DefaultCapacity = 1024;
static constexpr uint32 BadIndex = UINT32_MAX;
static constexpr uint64 InvalidClientId = UINT64_MAX;
typedef Util::Vector<TrackedCmdLocation, DefaultCapacity, Pal::Platform> TrackedCmdLocationVec;
static uint32 GetTrackedCmdLocationArraySizeInBytes()
{
return sizeof(TrackedCmdLocationArray);
}
static TrackedCmdLocationArray* CreateTrackedCmdLocationArray(
void* pMemory,
Pal::Platform* pPlatform);
void Reset()
{
m_lastLocation = TrackedCmdLocationRef(this, BadIndex);
m_clientId = InvalidClientId;
m_locations.Clear();
}
void Destroy();
uint64 GetClientId() const
{
return m_clientId;
}
Result SetClientId(
uint64 clientId);
Util::uint32 GetTotalSize() const
{
return m_locations.size();
}
const TrackedCmdLocationVec& GetLocationsVec() const
{
return m_locations;
}
TrackedCmdLocationVec& UseLocationsVec()
{
return m_locations;
}
Pal::Result MakeNext(
TrackedCmdLocationRef* pResult);
const TrackedCmdLocationRef GetLast() const
{
return m_lastLocation;
}
bool IsLast(
TrackedCmdLocationRef const& location) const
{
return location == m_lastLocation;
}
private:
TrackedCmdLocationVec m_locations;
Pal::Platform* m_pPlatform;
uint64 m_clientId;
TrackedCmdLocationRef m_lastLocation;
TrackedCmdLocationArray(
Pal::Platform* pPlatform);
~TrackedCmdLocationArray() = default;
};
} // namespace CmdDisassembly
} // namespace Pal
@@ -1,70 +1,70 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palDestroyable.h
* @brief Defines the Platform Abstraction Library (PAL) IDestroyable interface.
***********************************************************************************************************************
*/
#pragma once
namespace Pal
{
/**
***********************************************************************************************************************
* @interface IDestroyable
* @brief Interface inherited by objects that must be explicitly destroyed by the client.
*
* This includes all objects except:
*
* + @ref IColorTargetView, @ref IDepthStencilView - These classes are treated as SRDs by the DX12 runtime. Therefore,
* PAL guarantees that no action needs to be taken at Destroy() - the client should just free the memory backing these
* classes.
* + @ref IDevice - These objects are created during IPlatform::EnumerateDevices() and are automatically destroyed
* along with the Platform object.
* + @ref IPrivateScreen - These objects are created as during IPlatform::EnumerateDevices() based on
* which screens are attached to each device. They are automatically destroyed along with the Platform object.
***********************************************************************************************************************
*/
class IDestroyable
{
public:
/// Frees all resources associated with this object.
///
/// It is the client's responsibility to only call this method once there are no more existing references to this
/// object. This method does not free the system memory associated with the object (as specified in pPlacementAddr
/// during creation); the client is responsible for freeing that memory since they allocated it.
virtual void Destroy() = 0;
protected:
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IDestroyable() { }
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palDestroyable.h
* @brief Defines the Platform Abstraction Library (PAL) IDestroyable interface.
***********************************************************************************************************************
*/
#pragma once
namespace Pal
{
/**
***********************************************************************************************************************
* @interface IDestroyable
* @brief Interface inherited by objects that must be explicitly destroyed by the client.
*
* This includes all objects except:
*
* + @ref IColorTargetView, @ref IDepthStencilView - These classes are treated as SRDs by the DX12 runtime. Therefore,
* PAL guarantees that no action needs to be taken at Destroy() - the client should just free the memory backing these
* classes.
* + @ref IDevice - These objects are created during IPlatform::EnumerateDevices() and are automatically destroyed
* along with the Platform object.
* + @ref IPrivateScreen - These objects are created as during IPlatform::EnumerateDevices() based on
* which screens are attached to each device. They are automatically destroyed along with the Platform object.
***********************************************************************************************************************
*/
class IDestroyable
{
public:
/// Frees all resources associated with this object.
///
/// It is the client's responsibility to only call this method once there are no more existing references to this
/// object. This method does not free the system memory associated with the object (as specified in pPlacementAddr
/// during creation); the client is responsible for freeing that memory since they allocated it.
virtual void Destroy() = 0;
protected:
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IDestroyable() { }
};
} // Pal
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,171 +1,171 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palFence.h
* @brief Defines the Platform Abstraction Library (PAL) IFence interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
/// Specifies properties for fence @ref IFence fence creation. Input structure to IDevice::CreateFence().
struct FenceCreateInfo
{
union
{
struct
{
uint32 signaled : 1; ///< Specify whether the initial status of the fence is signaled or not.
uint32 eventCanBeInherited : 1; ///< The event handle can be inherited by child process.
uint32 shareable : 1; ///< This fence may be opened for use by a different device.
uint32 reserved : 29; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< Fence creation flags.
#if defined(_WIN32)
const wchar_t* pName; /// The name of the event object, Windows uses this name to uniquely identify fence objects
/// across processes.
#endif
};
/// Specifies properties for fence opening. Input structure to IDevice::OpenFence().
struct FenceOpenInfo
{
union
{
struct
{
uint32 isReference : 1; ///< If set, then the opened fence will reference the same sync object
///< in the kernel. Otherwise, the object is copied to the new Fence.
uint32 reserved : 31; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags;
OsExternalHandle externalFence; ///< External shared fence handle.
#if defined(_WIN32)
const wchar_t* pName; /// The name of the event object,Windows uses this name to uniquely identify
/// fence objects across processes.
#endif
};
/// Specifies properties for fence exporting. Input structure to IFence::ExportExternalHandle().
struct FenceExportInfo
{
union
{
struct
{
uint32 isReference : 1; ///< If set, then the fence exporting a handle that reference the same sync
///< object in the kernel. Otherwise, the object is copied to the new Fence.
uint32 implicitReset : 1; ///< If set, a fence reset will be done for the sync fd exported.
uint32 reserved : 30; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags;
};
/**
***********************************************************************************************************************
* @interface IFence
* @brief Represents a command buffer fence the client can use for coarse-level synchronization between the GPU and
* CPU.
*
* Fences can be specified when calling IQueue::Submit() and will be signaled when certain prior queue operations have
* completed. The status of the fence can be queried by the client to determine when the GPU work of interest has
* completed.
*
* Fences are guaranteed to wait for:
* + Prior command buffer submissions.
* + Prior queue semaphore signals and waits.
* + Prior direct presents.
*
* @see IDevice::CreateFence()
***********************************************************************************************************************
*/
class IFence : public IDestroyable
{
public:
/// Gets the status (completed or not) of the fence.
///
/// @returns Success if the fence has been reached, or NotReady if the fence hasn't been reached. Other return
/// codes indicate an error:
/// + ErrorFenceNeverSubmitted if the fence hasn't been submitted yet and the fence is not created with
/// initialSignaled set to true.
virtual Result GetStatus() const = 0;
/// Export the event handle or sync object handle of the fence for external usage.
/// If @ref FenceExportInfo::isReference is not set, then this also performs an implicit reset operation on
/// the Fence.
///
/// @param [in] exportInfo Information describing how the Fence handle should be exported.
/// @returns the handle in the type OsExternalHandle
virtual OsExternalHandle ExportExternalHandle(
const FenceExportInfo& exportInfo) const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IFence() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IFence() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palFence.h
* @brief Defines the Platform Abstraction Library (PAL) IFence interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
/// Specifies properties for fence @ref IFence fence creation. Input structure to IDevice::CreateFence().
struct FenceCreateInfo
{
union
{
struct
{
uint32 signaled : 1; ///< Specify whether the initial status of the fence is signaled or not.
uint32 eventCanBeInherited : 1; ///< The event handle can be inherited by child process.
uint32 shareable : 1; ///< This fence may be opened for use by a different device.
uint32 reserved : 29; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< Fence creation flags.
#if defined(_WIN32)
const wchar_t* pName; /// The name of the event object, Windows uses this name to uniquely identify fence objects
/// across processes.
#endif
};
/// Specifies properties for fence opening. Input structure to IDevice::OpenFence().
struct FenceOpenInfo
{
union
{
struct
{
uint32 isReference : 1; ///< If set, then the opened fence will reference the same sync object
///< in the kernel. Otherwise, the object is copied to the new Fence.
uint32 reserved : 31; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags;
OsExternalHandle externalFence; ///< External shared fence handle.
#if defined(_WIN32)
const wchar_t* pName; /// The name of the event object,Windows uses this name to uniquely identify
/// fence objects across processes.
#endif
};
/// Specifies properties for fence exporting. Input structure to IFence::ExportExternalHandle().
struct FenceExportInfo
{
union
{
struct
{
uint32 isReference : 1; ///< If set, then the fence exporting a handle that reference the same sync
///< object in the kernel. Otherwise, the object is copied to the new Fence.
uint32 implicitReset : 1; ///< If set, a fence reset will be done for the sync fd exported.
uint32 reserved : 30; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags;
};
/**
***********************************************************************************************************************
* @interface IFence
* @brief Represents a command buffer fence the client can use for coarse-level synchronization between the GPU and
* CPU.
*
* Fences can be specified when calling IQueue::Submit() and will be signaled when certain prior queue operations have
* completed. The status of the fence can be queried by the client to determine when the GPU work of interest has
* completed.
*
* Fences are guaranteed to wait for:
* + Prior command buffer submissions.
* + Prior queue semaphore signals and waits.
* + Prior direct presents.
*
* @see IDevice::CreateFence()
***********************************************************************************************************************
*/
class IFence : public IDestroyable
{
public:
/// Gets the status (completed or not) of the fence.
///
/// @returns Success if the fence has been reached, or NotReady if the fence hasn't been reached. Other return
/// codes indicate an error:
/// + ErrorFenceNeverSubmitted if the fence hasn't been submitted yet and the fence is not created with
/// initialSignaled set to true.
virtual Result GetStatus() const = 0;
/// Export the event handle or sync object handle of the fence for external usage.
/// If @ref FenceExportInfo::isReference is not set, then this also performs an implicit reset operation on
/// the Fence.
///
/// @param [in] exportInfo Information describing how the Fence handle should be exported.
/// @returns the handle in the type OsExternalHandle
virtual OsExternalHandle ExportExternalHandle(
const FenceExportInfo& exportInfo) const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IFence() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IFence() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,139 +1,139 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palGpuMemoryBindable.h
* @brief Defines the Platform Abstraction Library (PAL) IGpuMemoryBindable interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
// Forward declarations.
class IGpuMemory;
/// Reports required properties of a GPU memory object bound to a specific object. The client must query these
/// properties via IGpuMemoryBindable::GetGpuMemoryRequirements() and bind an @ref IGpuMemory object matching these
/// requirements to the @ref IGpuMemoryBindable object using IGpuMemoryBindable::BindGpuMemory().
struct GpuMemoryRequirements
{
union
{
struct
{
uint32 cpuAccess : 1; ///< CPU access is required. If set, the client must not set cpuInvisible in
/// GpuMemoryCreateFlags and must provide CPU visible heaps or CPU visible heap
/// access mode. If not set, it's strongly recommended to set cpuInvisible.
uint32 reserved : 31; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< Flags specifying required GPU memory properties.
gpusize size; ///< Amount of GPU memory required, in bytes.
gpusize alignment; ///< Required GPU memory virtual address alignment, in bytes.
uint32 heapCount; ///< Number of valid entries in heaps[].
GpuHeap heaps[GpuHeapCount]; ///< List of allowed heaps for the GPU memory in order of predicted performance.
};
/**
***********************************************************************************************************************
* @interface IGpuMemoryBindable
* @brief Interface inherited by objects that may require GPU memory be bound to them.
*
* In the future, PAL may discover a need to allocate GPU memory for a class that currently doesn't require it. In that
* situation, that class will be updated to inherit from IGpuMemoryBindable. This change would break backward
* compatibility and would result in the major interface version being incremented.
***********************************************************************************************************************
*/
class IGpuMemoryBindable : public IDestroyable
{
public:
/// Queries the GPU memory properties required by this object. The client should query properties with this method,
/// create/sub-allocate a memory range matching the requirements, then bind the memory to the object via
/// @ref BindGpuMemory().
///
/// @note Not all objects may actually need GPU memory, and in that case the memory properties will reflect a 0 size
/// and alignment.
///
/// @param [out] pGpuMemReqs Required properties of GPU memory to be bound to this object. Includes properties like
/// size, alignment, and allowed heaps.
virtual void GetGpuMemoryRequirements(
GpuMemoryRequirements* pGpuMemReqs) const = 0;
/// Binds GPU memory to this object according to the requirements queried via GetGpuMemoryRequirements().
///
/// Binding memory to objects other than images automatically initializes the object memory as necessary. Image
/// objects used as color or depth-stencil targets have to be explicitly initialized in command buffers using a
/// ICmdBuffer::CmdReleaseThenAcquire() command to transition them out of the LayoutUninitializedTarget usage.
///
/// Binding memory to an object automatically unbinds any previously bound memory. There is no need to bind null to
/// an object to explicitly unbind a previously bound allocation before binding a new allocation.
///
/// This call is invalid on objects that have no memory requirements, even if binding null.
///
/// @param [in] pGpuMemory GPU memory to be bound. If null, the previous binding will be released.
/// @param [in] offset Offset into the GPU memory where the object's memory range should begin. This allows
/// sub-allocating many object's GPU memory from the same IGpuMemory object.
///
/// @returns Success if the specified GPU memory was successfully bound to the object. Otherwise, one of the
/// following errors may be returned:
/// + ErrorUnavailable if binding a non-image to a virtual allocation.
/// + ErrorInvalidAlignment if the offset does not match the alignment requirements of the object.
/// + ErrorInvalidMemorySize if the object's required memory size does not fit completely within the given
/// memory object at the specified offset.
virtual Result BindGpuMemory(
IGpuMemory* pGpuMemory,
gpusize offset) = 0;
/// Returns the GPU memory object and offset that this object is bound to or nullptr and 0 if not bound.
///
/// @param [out] ppGpuMemory Returns the GPU memory object to the address specified in this pointer.
/// Returns nullptr if this object is not bound to any GPU memory.
/// @param [out] pOffset Returns the GPU memory offset to the address specified in this pointer.
/// Returns 0 if this object is not bound to any GPU memory.
///
/// @returns Success if the GPU memory and offset was successfully returned. Otherwise, one of the following errors
/// may be returned:
/// + ErrorGpuMemoryNotBound if this object is not bound to any GPU memory.
/// + ErrorInvalidPointer if either ppGpuMemory or pOffset is nullptr.
/// + ErrorUnavailable if binding is not supported in the derived class
virtual Result GetGpuMemory(
IGpuMemory** ppGpuMemory,
gpusize* pOffset) const = 0;
protected:
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IGpuMemoryBindable() { }
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palGpuMemoryBindable.h
* @brief Defines the Platform Abstraction Library (PAL) IGpuMemoryBindable interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
// Forward declarations.
class IGpuMemory;
/// Reports required properties of a GPU memory object bound to a specific object. The client must query these
/// properties via IGpuMemoryBindable::GetGpuMemoryRequirements() and bind an @ref IGpuMemory object matching these
/// requirements to the @ref IGpuMemoryBindable object using IGpuMemoryBindable::BindGpuMemory().
struct GpuMemoryRequirements
{
union
{
struct
{
uint32 cpuAccess : 1; ///< CPU access is required. If set, the client must not set cpuInvisible in
/// GpuMemoryCreateFlags and must provide CPU visible heaps or CPU visible heap
/// access mode. If not set, it's strongly recommended to set cpuInvisible.
uint32 reserved : 31; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< Flags specifying required GPU memory properties.
gpusize size; ///< Amount of GPU memory required, in bytes.
gpusize alignment; ///< Required GPU memory virtual address alignment, in bytes.
uint32 heapCount; ///< Number of valid entries in heaps[].
GpuHeap heaps[GpuHeapCount]; ///< List of allowed heaps for the GPU memory in order of predicted performance.
};
/**
***********************************************************************************************************************
* @interface IGpuMemoryBindable
* @brief Interface inherited by objects that may require GPU memory be bound to them.
*
* In the future, PAL may discover a need to allocate GPU memory for a class that currently doesn't require it. In that
* situation, that class will be updated to inherit from IGpuMemoryBindable. This change would break backward
* compatibility and would result in the major interface version being incremented.
***********************************************************************************************************************
*/
class IGpuMemoryBindable : public IDestroyable
{
public:
/// Queries the GPU memory properties required by this object. The client should query properties with this method,
/// create/sub-allocate a memory range matching the requirements, then bind the memory to the object via
/// @ref BindGpuMemory().
///
/// @note Not all objects may actually need GPU memory, and in that case the memory properties will reflect a 0 size
/// and alignment.
///
/// @param [out] pGpuMemReqs Required properties of GPU memory to be bound to this object. Includes properties like
/// size, alignment, and allowed heaps.
virtual void GetGpuMemoryRequirements(
GpuMemoryRequirements* pGpuMemReqs) const = 0;
/// Binds GPU memory to this object according to the requirements queried via GetGpuMemoryRequirements().
///
/// Binding memory to objects other than images automatically initializes the object memory as necessary. Image
/// objects used as color or depth-stencil targets have to be explicitly initialized in command buffers using a
/// ICmdBuffer::CmdReleaseThenAcquire() command to transition them out of the LayoutUninitializedTarget usage.
///
/// Binding memory to an object automatically unbinds any previously bound memory. There is no need to bind null to
/// an object to explicitly unbind a previously bound allocation before binding a new allocation.
///
/// This call is invalid on objects that have no memory requirements, even if binding null.
///
/// @param [in] pGpuMemory GPU memory to be bound. If null, the previous binding will be released.
/// @param [in] offset Offset into the GPU memory where the object's memory range should begin. This allows
/// sub-allocating many object's GPU memory from the same IGpuMemory object.
///
/// @returns Success if the specified GPU memory was successfully bound to the object. Otherwise, one of the
/// following errors may be returned:
/// + ErrorUnavailable if binding a non-image to a virtual allocation.
/// + ErrorInvalidAlignment if the offset does not match the alignment requirements of the object.
/// + ErrorInvalidMemorySize if the object's required memory size does not fit completely within the given
/// memory object at the specified offset.
virtual Result BindGpuMemory(
IGpuMemory* pGpuMemory,
gpusize offset) = 0;
/// Returns the GPU memory object and offset that this object is bound to or nullptr and 0 if not bound.
///
/// @param [out] ppGpuMemory Returns the GPU memory object to the address specified in this pointer.
/// Returns nullptr if this object is not bound to any GPU memory.
/// @param [out] pOffset Returns the GPU memory offset to the address specified in this pointer.
/// Returns 0 if this object is not bound to any GPU memory.
///
/// @returns Success if the GPU memory and offset was successfully returned. Otherwise, one of the following errors
/// may be returned:
/// + ErrorGpuMemoryNotBound if this object is not bound to any GPU memory.
/// + ErrorInvalidPointer if either ppGpuMemory or pOffset is nullptr.
/// + ErrorUnavailable if binding is not supported in the derived class
virtual Result GetGpuMemory(
IGpuMemory** ppGpuMemory,
gpusize* pOffset) const = 0;
protected:
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IGpuMemoryBindable() { }
};
} // Pal
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,187 +1,187 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palMsaaState.h
* @brief Defines the Platform Abstraction Library (PAL) IMsaaState interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
/// Specifies conservative rasterization mode
enum class ConservativeRasterizationMode : uint8
{
Overestimate = 0x0, ///< Fragments will be generated if the primitive area covers any portion of the pixel.
Underestimate = 0x1, ///< Fragments will be generated if all of the pixel is covered by the primitive.
Count
};
/// Maximum supported number of MSAA color samples.
constexpr uint32 MaxMsaaColorSamples = 16;
/// Maximum supported number of MSAA depth samples.
constexpr uint32 MaxMsaaDepthSamples = 8;
/// Maximum supported number of MSAA fragments.
constexpr uint32 MaxMsaaFragments = 8;
/// Sampling pattern grid size. This is a quad of pixels, i.e. 2x2 grid of pixels.
constexpr Extent2d MaxGridSize = { 2, 2 };
/// The positions are rounded to 1/Pow2(SubPixelBits)
constexpr uint32 SubPixelBits = 4;
/// Each pixel is subdivided into Pow2(SubPixelBits) x Pow2(SubPixelBits) grid of possible sample locations.
constexpr Extent2d SubPixelGridSize = { 16, 16 };
/// Represents a 2D coordinate with each component in [-8/16, 7/16]
struct SampleLocation
{
int8 x; ///< X offset.
int8 y; ///< Y offset.
/// Conversion operator that does sign-extension.
operator Offset2d() const { return { x, y }; }
};
/// Specifies a custom multisample pattern for a pixel quad.
struct MsaaQuadSamplePattern
{
SampleLocation topLeft[MaxMsaaRasterizerSamples]; ///< Sample locations for TL pixel of quad.
SampleLocation topRight[MaxMsaaRasterizerSamples]; ///< Sample locations for TR pixel of quad.
SampleLocation bottomLeft[MaxMsaaRasterizerSamples]; ///< Sample locations for BL pixel of quad.
SampleLocation bottomRight[MaxMsaaRasterizerSamples]; ///< Sample locations for BR pixel of quad.
};
/// Specifies properties for creation of an @ref IMsaaState object. Input structure to IDevice::CreateMsaaState().
struct MsaaStateCreateInfo
{
uint8 coverageSamples; ///< Number of rasterizer samples. Must be greater than or equal to all sample
/// rates in the pipeline. Valid values are 1, 2, 4, 8, and 16.
uint8 exposedSamples; ///< Number of samples exposed in the pixel shader coverage mask. Must be less
/// than or equal to coverageSamples. Valid values are 1, 2, 4, and 8.
uint8 pixelShaderSamples; ///< Controls the pixel shader execution rate. Must be less than or equal to
/// coverageSamples. Valid values are 1, 2, 4, and 8. Note that value with
/// greater than 1 doesn't mean sample rate shading is enabled. Sample rate
/// shading is enabled by either @ref forceSampleRateShading or pixel shader.
uint8 depthStencilSamples; ///< Number of samples in the bound depth target. Must be less than or equal to
/// coverageSamples. Valid values are 1, 2, 4, and 8.
uint8 shaderExportMaskSamples; ///< Number of samples to use in the shader export mask. Should match the number
/// of color target fragments clamped to
/// @ref DeviceProperties imageProperties.maxMsaaFragments.
uint8 sampleClusters; ///< Number of sample clusters to control over-rasterization (all samples in a
/// cluster are rasterized if any are hit). Must be less than or equal to
/// coverageSamples. Valid values are 1, 2, 4, and 8.
uint8 alphaToCoverageSamples; ///< How many samples of quality to generate with alpha-to-coverage. Must be
/// less than or equal to coverageSamples. Valid values are 1, 2, 4, 8, and 16.
uint8 occlusionQuerySamples; ///< Controls the number of samples to use for occlusion queries.
/// This value must never exceed the MSAA rate.
uint16 sampleMask; ///< Bitmask of which color target and depth/stencil samples should be updated.
/// The lowest bit corresponds to sample 0.
/// Selects overestimate or underestimate conservative rasterization mode. Used only if
/// @ref MsaaStateCreateInfo::flags::enableConservativeRasterization is set to true.
ConservativeRasterizationMode conservativeRasterizationMode;
union
{
struct
{
uint8 enableConservativeRasterization : 1; ///< Set to true to enable conservative rasterization
uint8 enable1xMsaaSampleLocations : 1; ///< Set to true to enable 1xMSAA quad sample pattern
uint8 disableAlphaToCoverageDither : 1; ///< Disables coverage dithering.
uint8 enableLineStipple : 1; ///< Set to true to enable line stippling
uint8 forceSampleRateShading : 1; ///< Sample rate shading can be enabled by either the pixel
/// shader, or forced here with forceSampleRateShading = 1.
/// Value 0 means sample rate shading is decided by pixel shader
/// and value 1 means sample rate shading is forced enabled.
/// This bit is for openGL glMinSampleShading, where sample rate
/// shading can be enabled by glEnable(GL_SAMPLE_SHADING)
/// instead of by the pixel shader.
uint8 reserved : 3; ///< Reserved for future use
};
uint8 u8All;
} flags;
};
/**
***********************************************************************************************************************
* @interface IMsaaState
* @brief Dynamic state object controlling fixed function MSAA state.
*
* Configures sample counts of various portions of the pipeline, specifies sample positions, etc. The full range of
* EQAA hardware features are exposed.
*
* @see IDevice::CreateMsaaState
***********************************************************************************************************************
*/
class IMsaaState : public IDestroyable
{
public:
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IMsaaState() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IMsaaState() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palMsaaState.h
* @brief Defines the Platform Abstraction Library (PAL) IMsaaState interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
/// Specifies conservative rasterization mode
enum class ConservativeRasterizationMode : uint8
{
Overestimate = 0x0, ///< Fragments will be generated if the primitive area covers any portion of the pixel.
Underestimate = 0x1, ///< Fragments will be generated if all of the pixel is covered by the primitive.
Count
};
/// Maximum supported number of MSAA color samples.
constexpr uint32 MaxMsaaColorSamples = 16;
/// Maximum supported number of MSAA depth samples.
constexpr uint32 MaxMsaaDepthSamples = 8;
/// Maximum supported number of MSAA fragments.
constexpr uint32 MaxMsaaFragments = 8;
/// Sampling pattern grid size. This is a quad of pixels, i.e. 2x2 grid of pixels.
constexpr Extent2d MaxGridSize = { 2, 2 };
/// The positions are rounded to 1/Pow2(SubPixelBits)
constexpr uint32 SubPixelBits = 4;
/// Each pixel is subdivided into Pow2(SubPixelBits) x Pow2(SubPixelBits) grid of possible sample locations.
constexpr Extent2d SubPixelGridSize = { 16, 16 };
/// Represents a 2D coordinate with each component in [-8/16, 7/16]
struct SampleLocation
{
int8 x; ///< X offset.
int8 y; ///< Y offset.
/// Conversion operator that does sign-extension.
operator Offset2d() const { return { x, y }; }
};
/// Specifies a custom multisample pattern for a pixel quad.
struct MsaaQuadSamplePattern
{
SampleLocation topLeft[MaxMsaaRasterizerSamples]; ///< Sample locations for TL pixel of quad.
SampleLocation topRight[MaxMsaaRasterizerSamples]; ///< Sample locations for TR pixel of quad.
SampleLocation bottomLeft[MaxMsaaRasterizerSamples]; ///< Sample locations for BL pixel of quad.
SampleLocation bottomRight[MaxMsaaRasterizerSamples]; ///< Sample locations for BR pixel of quad.
};
/// Specifies properties for creation of an @ref IMsaaState object. Input structure to IDevice::CreateMsaaState().
struct MsaaStateCreateInfo
{
uint8 coverageSamples; ///< Number of rasterizer samples. Must be greater than or equal to all sample
/// rates in the pipeline. Valid values are 1, 2, 4, 8, and 16.
uint8 exposedSamples; ///< Number of samples exposed in the pixel shader coverage mask. Must be less
/// than or equal to coverageSamples. Valid values are 1, 2, 4, and 8.
uint8 pixelShaderSamples; ///< Controls the pixel shader execution rate. Must be less than or equal to
/// coverageSamples. Valid values are 1, 2, 4, and 8. Note that value with
/// greater than 1 doesn't mean sample rate shading is enabled. Sample rate
/// shading is enabled by either @ref forceSampleRateShading or pixel shader.
uint8 depthStencilSamples; ///< Number of samples in the bound depth target. Must be less than or equal to
/// coverageSamples. Valid values are 1, 2, 4, and 8.
uint8 shaderExportMaskSamples; ///< Number of samples to use in the shader export mask. Should match the number
/// of color target fragments clamped to
/// @ref DeviceProperties imageProperties.maxMsaaFragments.
uint8 sampleClusters; ///< Number of sample clusters to control over-rasterization (all samples in a
/// cluster are rasterized if any are hit). Must be less than or equal to
/// coverageSamples. Valid values are 1, 2, 4, and 8.
uint8 alphaToCoverageSamples; ///< How many samples of quality to generate with alpha-to-coverage. Must be
/// less than or equal to coverageSamples. Valid values are 1, 2, 4, 8, and 16.
uint8 occlusionQuerySamples; ///< Controls the number of samples to use for occlusion queries.
/// This value must never exceed the MSAA rate.
uint16 sampleMask; ///< Bitmask of which color target and depth/stencil samples should be updated.
/// The lowest bit corresponds to sample 0.
/// Selects overestimate or underestimate conservative rasterization mode. Used only if
/// @ref MsaaStateCreateInfo::flags::enableConservativeRasterization is set to true.
ConservativeRasterizationMode conservativeRasterizationMode;
union
{
struct
{
uint8 enableConservativeRasterization : 1; ///< Set to true to enable conservative rasterization
uint8 enable1xMsaaSampleLocations : 1; ///< Set to true to enable 1xMSAA quad sample pattern
uint8 disableAlphaToCoverageDither : 1; ///< Disables coverage dithering.
uint8 enableLineStipple : 1; ///< Set to true to enable line stippling
uint8 forceSampleRateShading : 1; ///< Sample rate shading can be enabled by either the pixel
/// shader, or forced here with forceSampleRateShading = 1.
/// Value 0 means sample rate shading is decided by pixel shader
/// and value 1 means sample rate shading is forced enabled.
/// This bit is for openGL glMinSampleShading, where sample rate
/// shading can be enabled by glEnable(GL_SAMPLE_SHADING)
/// instead of by the pixel shader.
uint8 reserved : 3; ///< Reserved for future use
};
uint8 u8All;
} flags;
};
/**
***********************************************************************************************************************
* @interface IMsaaState
* @brief Dynamic state object controlling fixed function MSAA state.
*
* Configures sample counts of various portions of the pipeline, specifies sample positions, etc. The full range of
* EQAA hardware features are exposed.
*
* @see IDevice::CreateMsaaState
***********************************************************************************************************************
*/
class IMsaaState : public IDestroyable
{
public:
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IMsaaState() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IMsaaState() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
@@ -1,234 +1,234 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palQueryPool.h
* @brief Defines the Platform Abstraction Library (PAL) IQueryPool interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palGpuMemoryBindable.h"
namespace Pal
{
/// Specifies a category of GPU query pool.
enum class QueryPoolType : uint32
{
Occlusion = 0x0, ///< Occlusion query pool. Supports queries based on the Z test.
PipelineStats = 0x1, ///< Pipeline stats query pool. Supports queries based on statistics from the GPU's execution
/// such as a count of prims generated, shader invocations, etc.
StreamoutStats = 0x2, ///< Streamout query pool. Supports queries based on statistics from the GPU's execution
/// such as number of primitives written to SO buffer and storage needed.
Count,
};
/// Specifies what data a query slot must produce. Some query pool types support multiple query types.
enum class QueryType : uint32
{
Occlusion = 0x0, ///< The total passes recorded by the Z test.
BinaryOcclusion = 0x1, ///< One if there were one or more Z test passes, zero otherwise.
PipelineStats = 0x2, ///< The total statistics selected by the given pipeline stats query pool.
StreamoutStats = 0x3, ///< SO statistics tracked by CP/VGT including primitives written and storage needed.
StreamoutStats1 = 0x4, ///< SO1 statistics tracked by CP/VGT including primitives written and storage needed.
StreamoutStats2 = 0x5, ///< SO2 statistics tracked by CP/VGT including primitives written and storage needed.
StreamoutStats3 = 0x6, ///< SO3 statistics tracked by CP/VGT including primitives written and storage needed.
Count,
};
/// Specifies which pipeline stats should be tracked by a pipeline stats query pool.
enum QueryPipelineStatsFlags : uint32
{
QueryPipelineStatsIaVertices = 0x1, ///< Input vertices.
QueryPipelineStatsIaPrimitives = 0x2, ///< Input primitives.
QueryPipelineStatsVsInvocations = 0x4, ///< Vertex shader invocations.
QueryPipelineStatsGsInvocations = 0x8, ///< Geometry shader invocations.
QueryPipelineStatsGsPrimitives = 0x10, ///< Geometry shader primitives.
QueryPipelineStatsCInvocations = 0x20, ///< Clipper invocations.
QueryPipelineStatsCPrimitives = 0x40, ///< Clipper primitives.
QueryPipelineStatsPsInvocations = 0x80, ///< Pixel shader invocations.
QueryPipelineStatsHsInvocations = 0x100, ///< Hull shader invocations.
QueryPipelineStatsDsInvocations = 0x200, ///< Domain shader invocations.
QueryPipelineStatsCsInvocations = 0x400, ///< Compute shader invocations.
QueryPipelineStatsTsInvocations = 0x800, ///< Task shader invocations.
QueryPipelineStatsMsInvocations = 0x1000, ///< Mesh shader invocations.
QueryPipelineStatsMsPrimitives = 0x2000, ///< Mesh shader primitives.
QueryPipelineStatsAll = 0x3FFF ///< All of the above stats.
};
/// Specifies properties for @ref IQueryPool creation. Input structure to IDevice::CreateQueryPool().
struct QueryPoolCreateInfo
{
QueryPoolType queryPoolType; ///< Type of query pool to create (i.e., occlusion vs. pipeline stats).
uint32 numSlots; ///< Number of slots in the query pool.
uint32 enabledStats; ///< An ORed mask of stats flags specific to the query pool type.
/// @see QueryPipelineStatsFlags for PipelineStats query pools.
union
{
struct
{
/// If true, this query pool can have results retrieved using the CPU (using @ref IQueryPool::GetResults)
/// and can be reset using the CPU (using @ref IQueryPool::Reset). Otherwise, the client must use command
/// buffers to perform these operations (using @ref ICmdBuffer::CmdResetQueryPool and
/// @ref ICmdBuffer::CmdResolveQuery).
uint32 enableCpuAccess : 1;
uint32 reserved : 31; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed together as a uint32.
} flags; ///< Flags controlling QueryPool behavior.
};
/// Controls operations that compute query results.
enum QueryResultFlags : uint32
{
QueryResultDefault = 0x0, ///< Default to 32-bit results with no waiting.
QueryResult64Bit = 0x1, ///< Store all results as 64-bit values.
QueryResultWait = 0x2, ///< Wait for the queries to finish when computing the results.
QueryResultAvailability = 0x4, ///< If the results of a query are available at computation time a one will be
/// written as a separate value after the result value, if the results were not
/// available a zero will be written.
QueryResultPartial = 0x8, ///< If the final result of a query would be unavailable, then return a
/// result for that query between 0 and what the final result would be.
QueryResultAccumulate = 0x10, ///< Results are added to the values present in the destination, if availability
/// data is enabled it will be ANDed with the present availability data.
QueryResultPreferShaderPath = 0x20, ///< Prefer a shader resolve path over a command processor path.
QueryResultOnlyPrimNeeded = 0x40, ///< Select only primitives storage needed in Streamout query results
QueryResultAll = 0x7F ///< Clients should NOT use it, for internal static_assert purpose only.
};
/**
***********************************************************************************************************************
* @interface IQueryPool
* @brief Represents a set of queries that can be used to retrieve detailed info about the GPU's execution of a
* particular range of a command buffer.
*
* Currently, only occlusion queries and pipeline statistic queries are supported. All queries in a pool are the same
* type.
*
* @see IDevice::CreateQueryPool()
***********************************************************************************************************************
*/
class IQueryPool : public IGpuMemoryBindable
{
public:
/// Retrieves query results from a query pool.
///
/// Multiple consecutive query results can be retrieved with one call.
///
/// @param [in] flags Flags that control the result data layout and how the results are retrieved.
/// @param [in] queryType Specifies what data the query slots must produce.
/// @param [in] startQuery First query pool slot to retrieve data for.
/// @param [in] queryCount Number of query pool slots to retrieve data for.
/// @param [in] pMappedGpuAddr Specify the query buffer mapped address. If the parameter equals nullptr,
// this method will use Map\UnMap to access the data.
/// @param [in,out] pDataSize Input value specifies the available size in pData in bytes; output value reports the
/// number of bytes required to hold all result data.
/// @param [out] pData Location where the query results should be written. Can be null in order to query the
/// required size. The data returned depends on the query pool type and flags. All data
/// entries are either uint32 or uint64 integers. One or more type-specific entries will
/// be optionally followed by one entry for availability. The type-specific data is:<br>
/// + QueryOcclusion: One entry to store the zPass count.
/// + QueryPipelineStats: One entry per statistic enabled in the create info. The stats
/// will be written in the appropriate order for each PAL client.
/// @param [in] stride Stride in bytes between subsequent query result data or zero to request tightly
/// packed result data.
///
/// @returns Success if query results were successfully returned in pData, or NotReady if any of the requested query
/// slots does not yet have results available. Otherwise, one of the following error codes may be
/// returned:
/// + ErrorInvalidValue if the range defined by startQuery and queryCount is not valid for this query pool.
/// + ErrorGpuMemoryNotBound if the query pool requires GPU memory but none is bound.
/// + ErrorInvalidMemorySize if pData is non-null and the value stored in pDataSize is too small.
virtual Result GetResults(
QueryResultFlags flags,
QueryType queryType,
uint32 startQuery,
uint32 queryCount,
const void* pMappedGpuAddr,
size_t* pDataSize,
void* pData,
size_t stride) = 0;
/// Use CPU to reset the query pool slots.
///
/// Supported for occlusion and video decode statistics query pools.
///
/// @param [in] startQuery First query pool slot to reset.
/// @param [in] queryCount Number of query pool slots to reset.
/// @param [in] pMappedCpuAddr Specify the query buffer mapped address. If the parameter equals nullptr,
// this method will use Map/UnMap to access the data.
///
/// @returns Success if the reset was successfully performed.
virtual Result Reset(
uint32 startQuery,
uint32 queryCount,
void* pMappedCpuAddr) = 0;
/// Returns the distance, in bytes, between successive query slots in the bound GPU memory.
/// This method is only supported for @ref QueryPoolType::VideoDecodeStats
///
/// @returns the distance, in bytes, between successive query slots in the bound GPU memory.
virtual gpusize GetQuerySlotStride() const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IQueryPool() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IQueryPool() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palQueryPool.h
* @brief Defines the Platform Abstraction Library (PAL) IQueryPool interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palGpuMemoryBindable.h"
namespace Pal
{
/// Specifies a category of GPU query pool.
enum class QueryPoolType : uint32
{
Occlusion = 0x0, ///< Occlusion query pool. Supports queries based on the Z test.
PipelineStats = 0x1, ///< Pipeline stats query pool. Supports queries based on statistics from the GPU's execution
/// such as a count of prims generated, shader invocations, etc.
StreamoutStats = 0x2, ///< Streamout query pool. Supports queries based on statistics from the GPU's execution
/// such as number of primitives written to SO buffer and storage needed.
Count,
};
/// Specifies what data a query slot must produce. Some query pool types support multiple query types.
enum class QueryType : uint32
{
Occlusion = 0x0, ///< The total passes recorded by the Z test.
BinaryOcclusion = 0x1, ///< One if there were one or more Z test passes, zero otherwise.
PipelineStats = 0x2, ///< The total statistics selected by the given pipeline stats query pool.
StreamoutStats = 0x3, ///< SO statistics tracked by CP/VGT including primitives written and storage needed.
StreamoutStats1 = 0x4, ///< SO1 statistics tracked by CP/VGT including primitives written and storage needed.
StreamoutStats2 = 0x5, ///< SO2 statistics tracked by CP/VGT including primitives written and storage needed.
StreamoutStats3 = 0x6, ///< SO3 statistics tracked by CP/VGT including primitives written and storage needed.
Count,
};
/// Specifies which pipeline stats should be tracked by a pipeline stats query pool.
enum QueryPipelineStatsFlags : uint32
{
QueryPipelineStatsIaVertices = 0x1, ///< Input vertices.
QueryPipelineStatsIaPrimitives = 0x2, ///< Input primitives.
QueryPipelineStatsVsInvocations = 0x4, ///< Vertex shader invocations.
QueryPipelineStatsGsInvocations = 0x8, ///< Geometry shader invocations.
QueryPipelineStatsGsPrimitives = 0x10, ///< Geometry shader primitives.
QueryPipelineStatsCInvocations = 0x20, ///< Clipper invocations.
QueryPipelineStatsCPrimitives = 0x40, ///< Clipper primitives.
QueryPipelineStatsPsInvocations = 0x80, ///< Pixel shader invocations.
QueryPipelineStatsHsInvocations = 0x100, ///< Hull shader invocations.
QueryPipelineStatsDsInvocations = 0x200, ///< Domain shader invocations.
QueryPipelineStatsCsInvocations = 0x400, ///< Compute shader invocations.
QueryPipelineStatsTsInvocations = 0x800, ///< Task shader invocations.
QueryPipelineStatsMsInvocations = 0x1000, ///< Mesh shader invocations.
QueryPipelineStatsMsPrimitives = 0x2000, ///< Mesh shader primitives.
QueryPipelineStatsAll = 0x3FFF ///< All of the above stats.
};
/// Specifies properties for @ref IQueryPool creation. Input structure to IDevice::CreateQueryPool().
struct QueryPoolCreateInfo
{
QueryPoolType queryPoolType; ///< Type of query pool to create (i.e., occlusion vs. pipeline stats).
uint32 numSlots; ///< Number of slots in the query pool.
uint32 enabledStats; ///< An ORed mask of stats flags specific to the query pool type.
/// @see QueryPipelineStatsFlags for PipelineStats query pools.
union
{
struct
{
/// If true, this query pool can have results retrieved using the CPU (using @ref IQueryPool::GetResults)
/// and can be reset using the CPU (using @ref IQueryPool::Reset). Otherwise, the client must use command
/// buffers to perform these operations (using @ref ICmdBuffer::CmdResetQueryPool and
/// @ref ICmdBuffer::CmdResolveQuery).
uint32 enableCpuAccess : 1;
uint32 reserved : 31; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed together as a uint32.
} flags; ///< Flags controlling QueryPool behavior.
};
/// Controls operations that compute query results.
enum QueryResultFlags : uint32
{
QueryResultDefault = 0x0, ///< Default to 32-bit results with no waiting.
QueryResult64Bit = 0x1, ///< Store all results as 64-bit values.
QueryResultWait = 0x2, ///< Wait for the queries to finish when computing the results.
QueryResultAvailability = 0x4, ///< If the results of a query are available at computation time a one will be
/// written as a separate value after the result value, if the results were not
/// available a zero will be written.
QueryResultPartial = 0x8, ///< If the final result of a query would be unavailable, then return a
/// result for that query between 0 and what the final result would be.
QueryResultAccumulate = 0x10, ///< Results are added to the values present in the destination, if availability
/// data is enabled it will be ANDed with the present availability data.
QueryResultPreferShaderPath = 0x20, ///< Prefer a shader resolve path over a command processor path.
QueryResultOnlyPrimNeeded = 0x40, ///< Select only primitives storage needed in Streamout query results
QueryResultAll = 0x7F ///< Clients should NOT use it, for internal static_assert purpose only.
};
/**
***********************************************************************************************************************
* @interface IQueryPool
* @brief Represents a set of queries that can be used to retrieve detailed info about the GPU's execution of a
* particular range of a command buffer.
*
* Currently, only occlusion queries and pipeline statistic queries are supported. All queries in a pool are the same
* type.
*
* @see IDevice::CreateQueryPool()
***********************************************************************************************************************
*/
class IQueryPool : public IGpuMemoryBindable
{
public:
/// Retrieves query results from a query pool.
///
/// Multiple consecutive query results can be retrieved with one call.
///
/// @param [in] flags Flags that control the result data layout and how the results are retrieved.
/// @param [in] queryType Specifies what data the query slots must produce.
/// @param [in] startQuery First query pool slot to retrieve data for.
/// @param [in] queryCount Number of query pool slots to retrieve data for.
/// @param [in] pMappedGpuAddr Specify the query buffer mapped address. If the parameter equals nullptr,
// this method will use Map\UnMap to access the data.
/// @param [in,out] pDataSize Input value specifies the available size in pData in bytes; output value reports the
/// number of bytes required to hold all result data.
/// @param [out] pData Location where the query results should be written. Can be null in order to query the
/// required size. The data returned depends on the query pool type and flags. All data
/// entries are either uint32 or uint64 integers. One or more type-specific entries will
/// be optionally followed by one entry for availability. The type-specific data is:<br>
/// + QueryOcclusion: One entry to store the zPass count.
/// + QueryPipelineStats: One entry per statistic enabled in the create info. The stats
/// will be written in the appropriate order for each PAL client.
/// @param [in] stride Stride in bytes between subsequent query result data or zero to request tightly
/// packed result data.
///
/// @returns Success if query results were successfully returned in pData, or NotReady if any of the requested query
/// slots does not yet have results available. Otherwise, one of the following error codes may be
/// returned:
/// + ErrorInvalidValue if the range defined by startQuery and queryCount is not valid for this query pool.
/// + ErrorGpuMemoryNotBound if the query pool requires GPU memory but none is bound.
/// + ErrorInvalidMemorySize if pData is non-null and the value stored in pDataSize is too small.
virtual Result GetResults(
QueryResultFlags flags,
QueryType queryType,
uint32 startQuery,
uint32 queryCount,
const void* pMappedGpuAddr,
size_t* pDataSize,
void* pData,
size_t stride) = 0;
/// Use CPU to reset the query pool slots.
///
/// Supported for occlusion and video decode statistics query pools.
///
/// @param [in] startQuery First query pool slot to reset.
/// @param [in] queryCount Number of query pool slots to reset.
/// @param [in] pMappedCpuAddr Specify the query buffer mapped address. If the parameter equals nullptr,
// this method will use Map/UnMap to access the data.
///
/// @returns Success if the reset was successfully performed.
virtual Result Reset(
uint32 startQuery,
uint32 queryCount,
void* pMappedCpuAddr) = 0;
/// Returns the distance, in bytes, between successive query slots in the bound GPU memory.
/// This method is only supported for @ref QueryPoolType::VideoDecodeStats
///
/// @returns the distance, in bytes, between successive query slots in the bound GPU memory.
virtual gpusize GetQuerySlotStride() const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IQueryPool() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IQueryPool() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
File diff suppressed because it is too large Load Diff
@@ -1,275 +1,275 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palQueueSemaphore.h
* @brief Defines the Platform Abstraction Library (PAL) IQueueSemaphore interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
#include <chrono>
#if defined(_WIN32)
struct _SECURITY_ATTRIBUTES;
#endif
namespace Pal
{
// Forward declarations.
class IQueueSemaphore;
/// Specifies properties for @ref IQueueSemaphore creation. Input structure to IDevice::CreateQueueSemaphore().
struct QueueSemaphoreCreateInfo
{
union
{
struct
{
/// This queue semaphore may be opened for use by a different device.
/// For DX12 native fence, the flag needs to be consistent with D3DDDI_SYNCHRONIZATIONOBJECT_FLAGS.Shared
/// given by DX runtime.
uint32 shareable : 1;
/// This queue semaphore can only be shared through Nt handle.
/// For DX12 native fence, the flag needs to be consistent with
/// D3DDDI_SYNCHRONIZATIONOBJECT_FLAGS.NtSecuritySharing given by DX runtime.
uint32 sharedViaNtHandle : 1;
uint32 externalOpened : 1; ///< Semaphore was created by other APIs
/// This queue semaphore is a timeline semaphore. Timeline semaphores have a 64-bit unsigned integer payload
/// which gets monotonically increased with each Signal operation. A wait on a timeline semaphore blocks the
/// waiter until the specified payload value has been signaled.
/// For DX12 native fence, runtime determines initialCount. Therefore, timeline flag has to be set.
uint32 timeline : 1;
/// Do not signal the queue semaphore to max if the device is lost.
/// For DX12 native fence, the flag needs to be consistent with
/// D3DDDI_SYNCHRONIZATIONOBJECT_FLAGS.NoSignalMaxValueOnTdr given by DX runtime.
uint32 noSignalOnDeviceLost : 1;
/// For native fence only. If it's 0x0, the native fence type is D3DDDI_NATIVEFENCE_TYPE_DEFAULT.
/// If it's 0x1, native fence type is D3DDDI_NATIVEFENCE_TYPE_INTRA_GPU.
/// For DX12, the value is determined by runtime. DXCP needs to set it by reading D3DDDI_NATIVEFENCEINFO.
uint32 gpuOnly : 1;
/// This queue semaphore will be a monitored fence if this flag set, even if OS supports native fence.
uint32 forceUseMonitoredFence : 1;
uint32 reserved : 25; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< Queue semaphore creation flags.
uint32 maxCount; ///< The maximum signal count; once reached, further signals are dropped. Must be
/// non-zero and no more than maxSemaphoreCount in @ref DeviceProperties. For
/// example, a value of one would request a binary semaphore.
/// NOTE: maxCount does not apply to timeline semaphores.
uint64 initialCount; ///< Initial value for timeline semaphores. (or)
/// Initial count value for counting semaphores.
/// Must not be larger than maxCount for counting semaphores.
/// For DX12 native fence, DXCP needs to pass InitialFenceValue from
/// D3DDDI_NATIVEFENCEINFO.
};
/// Specifies parameters for opening a queue semaphore for use on another device. Input structure to
/// IDevice::OpenSharedQueueSemaphore().
struct QueueSemaphoreOpenInfo
{
/// Shared queue semaphore object from another device to be opened.
IQueueSemaphore* pSharedQueueSemaphore;
};
/// Specifies parameters for opening a queue semaphore created by other APIs such as D3D.
struct ExternalQueueSemaphoreOpenInfo
{
union
{
struct
{
uint32 crossProcess : 1; ///< This semaphore is created in another process.
uint32 sharedViaNtHandle : 1; ///< The shared semaphore handle is NT handle.
uint32 isReference : 1; ///< If set, then the opened semaphore will reference the same sync
///< object in the kernel. Otherwise, the object is copied to the
///< new Semaphore.
/// This queue semaphore is a timeline semaphore. Timeline semaphores have a 64-bit unsigned integer payload
/// which gets monotonically increased with each Signal operation. A wait on a timeline semaphore blocks the
/// waiter until the specified payload value has been signaled.
uint32 timeline : 1;
uint32 reserved : 28; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< External queue semaphore open flags.
OsExternalHandle externalSemaphore; ///< External shared semaphore handle.
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 882
#if defined(__unix__) && PAL_KMT_BUILD
uint64 syncFdSignalValue; ///< Signal timeline value when importing the state of a sync file
#endif
#endif
};
/// Specifies parameters for exporting a queue semaphore. Input structure to IQueueSemaphore::ExportExternalHandle().
struct QueueSemaphoreExportInfo
{
union
{
struct
{
uint32 isReference : 1; ///< If set, then the semaphore exporting a handle that reference the
///< same sync object in the kernel. Otherwise, the object is copied
///< to the new Semaphore.
uint32 reserved : 31; ///< Resevered for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< External queue semaphore export flags.
#if PAL_KMT_BUILD
const _SECURITY_ATTRIBUTES* pSecurityAttributes; ///< It specifies the security descriptor and the inheritable
/// attribute.
const wchar_t* pNtObjectName; ///< A name to NT handle, if the object is exported as a NT
/// handle with a name, and then the handle can be acquired
/// via this name.
uint32 accessFlags; ///< Desried access rights of GPU memory.
#if defined(__unix__)
uint64 syncFdWaitValue; ///< Wait timeline value when exporting the state of a sync file
#endif
#endif
};
/**
***********************************************************************************************************************
* @interface IQueueSemaphore
* @brief Semaphore object used to synchronize GPU work performed by multiple, parallel queues.
*
* These semaphores are used by calling IQueue::SignalQueueSemaphore() and IQueue::WaitQueueSemaphore().
*
* @see IDevice::CreateQueueSemaphore()
* @see IDevice::OpenSharedQueueSemaphore()
***********************************************************************************************************************
*/
class IQueueSemaphore : public IDestroyable
{
public:
/// An IQueue::WaitQueueSemaphore operation may need to be sent down to the OS after the corresponding
/// IQueue::SignalQueueSemaphore operation due to GPU scheduler limitations. This method checks if any queues have
/// batched-up commands waiting for a SignalQueueSemaphore operation to appear.
///
/// @returns True if one or more queues have some number of commands batched-up waiting for other queues to signal
/// this semaphore. False otherwise.
virtual bool HasStalledQueues() = 0;
/// Query timeline Semaphore payload
///
/// @param [out] pValue returned payload from querying
///
/// @returns Success if the timeline semaphore is queried successful. Otherwise, one of the following errors may
/// be returned:
/// + ErrorInvalidValue if an unexpected conversion error occurs.
/// + ErrorInvalidObjectType if semaphore is non-timeline type.
virtual Result QuerySemaphoreValue(
uint64* pValue) = 0;
/// Wait on timeline Semaphore points, to be clarified, this is a CPU wait.
///
/// @param [in] value Indicate which point to be waited.
/// @param [in] timeout the max waiting time, timeout is the timeout period in units of nanoseconds.
///
/// @returns Success if the timeline semaphore point is waited successful. Otherwise, one of the following errors
/// may be returned:
/// + ErrorInvalidValue if an unexpected conversion error occurs.
/// + ErrorInvalidObjectType if semaphore is non-timeline type.
virtual Result WaitSemaphoreValue(
uint64 value,
std::chrono::nanoseconds timeout) = 0;
/// Signal on timeline Semaphore points, to be clarified, this is a CPU signal.
///
/// @param [in] value Indicate which point to be signaled.
///
/// @returns Success if the timeline semaphore point is signaled successful. Otherwise, one of the following errors
/// may be returned:
/// + ErrorInvalidValue if an unexpected conversion error occurs.
/// + ErrorInvalidObjectType if semaphore is non-timeline type.
virtual Result SignalSemaphoreValue(
uint64 value) = 0;
#if PAL_KMT_BUILD || PAL_AMDGPU_BUILD
/// Returns an OS-specific handle which can be used to refer to this semaphore object across processes. This will
/// return a null or invalid handle if the object was not created with the external create flag set.
///
/// @param [in] exportInfo Information describing how the Semamphore handle should be exported.
/// @note This function is only available for Linux builds.
///
/// @returns An OS-specific handle which can be used to access the semaphore object across processes.
virtual OsExternalHandle ExportExternalHandle(
const QueueSemaphoreExportInfo& exportInfo) const = 0;
#endif
#if defined(_WIN32)
/// Returns an OS-specific handle which can be used by another device to access the semaphore object.
///
/// @returns An OS-specific handle which can be used by another device to access the semaphore object.
virtual OsExternalHandle ExportKmtHandle() const = 0;
#endif
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IQueueSemaphore() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IQueueSemaphore() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palQueueSemaphore.h
* @brief Defines the Platform Abstraction Library (PAL) IQueueSemaphore interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
#include <chrono>
#if defined(_WIN32)
struct _SECURITY_ATTRIBUTES;
#endif
namespace Pal
{
// Forward declarations.
class IQueueSemaphore;
/// Specifies properties for @ref IQueueSemaphore creation. Input structure to IDevice::CreateQueueSemaphore().
struct QueueSemaphoreCreateInfo
{
union
{
struct
{
/// This queue semaphore may be opened for use by a different device.
/// For DX12 native fence, the flag needs to be consistent with D3DDDI_SYNCHRONIZATIONOBJECT_FLAGS.Shared
/// given by DX runtime.
uint32 shareable : 1;
/// This queue semaphore can only be shared through Nt handle.
/// For DX12 native fence, the flag needs to be consistent with
/// D3DDDI_SYNCHRONIZATIONOBJECT_FLAGS.NtSecuritySharing given by DX runtime.
uint32 sharedViaNtHandle : 1;
uint32 externalOpened : 1; ///< Semaphore was created by other APIs
/// This queue semaphore is a timeline semaphore. Timeline semaphores have a 64-bit unsigned integer payload
/// which gets monotonically increased with each Signal operation. A wait on a timeline semaphore blocks the
/// waiter until the specified payload value has been signaled.
/// For DX12 native fence, runtime determines initialCount. Therefore, timeline flag has to be set.
uint32 timeline : 1;
/// Do not signal the queue semaphore to max if the device is lost.
/// For DX12 native fence, the flag needs to be consistent with
/// D3DDDI_SYNCHRONIZATIONOBJECT_FLAGS.NoSignalMaxValueOnTdr given by DX runtime.
uint32 noSignalOnDeviceLost : 1;
/// For native fence only. If it's 0x0, the native fence type is D3DDDI_NATIVEFENCE_TYPE_DEFAULT.
/// If it's 0x1, native fence type is D3DDDI_NATIVEFENCE_TYPE_INTRA_GPU.
/// For DX12, the value is determined by runtime. DXCP needs to set it by reading D3DDDI_NATIVEFENCEINFO.
uint32 gpuOnly : 1;
/// This queue semaphore will be a monitored fence if this flag set, even if OS supports native fence.
uint32 forceUseMonitoredFence : 1;
uint32 reserved : 25; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< Queue semaphore creation flags.
uint32 maxCount; ///< The maximum signal count; once reached, further signals are dropped. Must be
/// non-zero and no more than maxSemaphoreCount in @ref DeviceProperties. For
/// example, a value of one would request a binary semaphore.
/// NOTE: maxCount does not apply to timeline semaphores.
uint64 initialCount; ///< Initial value for timeline semaphores. (or)
/// Initial count value for counting semaphores.
/// Must not be larger than maxCount for counting semaphores.
/// For DX12 native fence, DXCP needs to pass InitialFenceValue from
/// D3DDDI_NATIVEFENCEINFO.
};
/// Specifies parameters for opening a queue semaphore for use on another device. Input structure to
/// IDevice::OpenSharedQueueSemaphore().
struct QueueSemaphoreOpenInfo
{
/// Shared queue semaphore object from another device to be opened.
IQueueSemaphore* pSharedQueueSemaphore;
};
/// Specifies parameters for opening a queue semaphore created by other APIs such as D3D.
struct ExternalQueueSemaphoreOpenInfo
{
union
{
struct
{
uint32 crossProcess : 1; ///< This semaphore is created in another process.
uint32 sharedViaNtHandle : 1; ///< The shared semaphore handle is NT handle.
uint32 isReference : 1; ///< If set, then the opened semaphore will reference the same sync
///< object in the kernel. Otherwise, the object is copied to the
///< new Semaphore.
/// This queue semaphore is a timeline semaphore. Timeline semaphores have a 64-bit unsigned integer payload
/// which gets monotonically increased with each Signal operation. A wait on a timeline semaphore blocks the
/// waiter until the specified payload value has been signaled.
uint32 timeline : 1;
uint32 reserved : 28; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< External queue semaphore open flags.
OsExternalHandle externalSemaphore; ///< External shared semaphore handle.
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 882
#if defined(__unix__) && PAL_KMT_BUILD
uint64 syncFdSignalValue; ///< Signal timeline value when importing the state of a sync file
#endif
#endif
};
/// Specifies parameters for exporting a queue semaphore. Input structure to IQueueSemaphore::ExportExternalHandle().
struct QueueSemaphoreExportInfo
{
union
{
struct
{
uint32 isReference : 1; ///< If set, then the semaphore exporting a handle that reference the
///< same sync object in the kernel. Otherwise, the object is copied
///< to the new Semaphore.
uint32 reserved : 31; ///< Resevered for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< External queue semaphore export flags.
#if PAL_KMT_BUILD
const _SECURITY_ATTRIBUTES* pSecurityAttributes; ///< It specifies the security descriptor and the inheritable
/// attribute.
const wchar_t* pNtObjectName; ///< A name to NT handle, if the object is exported as a NT
/// handle with a name, and then the handle can be acquired
/// via this name.
uint32 accessFlags; ///< Desried access rights of GPU memory.
#if defined(__unix__)
uint64 syncFdWaitValue; ///< Wait timeline value when exporting the state of a sync file
#endif
#endif
};
/**
***********************************************************************************************************************
* @interface IQueueSemaphore
* @brief Semaphore object used to synchronize GPU work performed by multiple, parallel queues.
*
* These semaphores are used by calling IQueue::SignalQueueSemaphore() and IQueue::WaitQueueSemaphore().
*
* @see IDevice::CreateQueueSemaphore()
* @see IDevice::OpenSharedQueueSemaphore()
***********************************************************************************************************************
*/
class IQueueSemaphore : public IDestroyable
{
public:
/// An IQueue::WaitQueueSemaphore operation may need to be sent down to the OS after the corresponding
/// IQueue::SignalQueueSemaphore operation due to GPU scheduler limitations. This method checks if any queues have
/// batched-up commands waiting for a SignalQueueSemaphore operation to appear.
///
/// @returns True if one or more queues have some number of commands batched-up waiting for other queues to signal
/// this semaphore. False otherwise.
virtual bool HasStalledQueues() = 0;
/// Query timeline Semaphore payload
///
/// @param [out] pValue returned payload from querying
///
/// @returns Success if the timeline semaphore is queried successful. Otherwise, one of the following errors may
/// be returned:
/// + ErrorInvalidValue if an unexpected conversion error occurs.
/// + ErrorInvalidObjectType if semaphore is non-timeline type.
virtual Result QuerySemaphoreValue(
uint64* pValue) = 0;
/// Wait on timeline Semaphore points, to be clarified, this is a CPU wait.
///
/// @param [in] value Indicate which point to be waited.
/// @param [in] timeout the max waiting time, timeout is the timeout period in units of nanoseconds.
///
/// @returns Success if the timeline semaphore point is waited successful. Otherwise, one of the following errors
/// may be returned:
/// + ErrorInvalidValue if an unexpected conversion error occurs.
/// + ErrorInvalidObjectType if semaphore is non-timeline type.
virtual Result WaitSemaphoreValue(
uint64 value,
std::chrono::nanoseconds timeout) = 0;
/// Signal on timeline Semaphore points, to be clarified, this is a CPU signal.
///
/// @param [in] value Indicate which point to be signaled.
///
/// @returns Success if the timeline semaphore point is signaled successful. Otherwise, one of the following errors
/// may be returned:
/// + ErrorInvalidValue if an unexpected conversion error occurs.
/// + ErrorInvalidObjectType if semaphore is non-timeline type.
virtual Result SignalSemaphoreValue(
uint64 value) = 0;
#if PAL_KMT_BUILD || PAL_AMDGPU_BUILD
/// Returns an OS-specific handle which can be used to refer to this semaphore object across processes. This will
/// return a null or invalid handle if the object was not created with the external create flag set.
///
/// @param [in] exportInfo Information describing how the Semamphore handle should be exported.
/// @note This function is only available for Linux builds.
///
/// @returns An OS-specific handle which can be used to access the semaphore object across processes.
virtual OsExternalHandle ExportExternalHandle(
const QueueSemaphoreExportInfo& exportInfo) const = 0;
#endif
#if defined(_WIN32)
/// Returns an OS-specific handle which can be used by another device to access the semaphore object.
///
/// @returns An OS-specific handle which can be used by another device to access the semaphore object.
virtual OsExternalHandle ExportKmtHandle() const = 0;
#endif
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IQueueSemaphore() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IQueueSemaphore() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
@@ -1,251 +1,253 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palShaderLibrary.h
* @brief Defines the Platform Abstraction Library (PAL) IShaderLibrary interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
#include "palStringView.h"
#include "palSpan.h"
namespace Pal
{
struct GpuMemSubAllocInfo;
/// Common flags controlling creation of shader libraries.
union LibraryCreateFlags
{
struct
{
uint32 clientInternal : 1; ///< Internal library not created by the application.
uint32 isGraphics : 1; ///< Whether it is a graphics library
uint32 reserved : 30; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
};
/// Specifies properties about an indirect function belonging to a @ref IShaderLibrary object. Part of the input
/// structure to IDevice::CreateShaderLibrary().
struct ShaderLibraryFunctionInfo
{
Util::StringView<char> symbolName; ///< ELF Symbol name for the associated function.
gpusize gpuVirtAddr; ///< [out] GPU virtual address of the function. This is computed by PAL during
/// library creation.
};
/// Specifies a shader sub type / ShaderKind.
enum class ShaderSubType : uint32
{
Unknown = 0,
Traversal,
RayGeneration,
Intersection,
AnyHit,
ClosestHit,
Miss,
Callable,
LaunchKernel, ///< Raytracing launch kernel
Count
};
/// Specifies properties for creation of a compute @ref IShaderLibrary object. Input structure to
/// IDevice::CreateShaderLibrary().
struct ShaderLibraryCreateInfo
{
LibraryCreateFlags flags; ///< Library creation flags
const void* pCodeObject; ///< Pointer to code-object ELF binary implementing the Pipeline ABI interface.
/// The code-object ELF contains pre-compiled shaders, register values, and
/// additional metadata.
size_t codeObjectSize; ///< Size of code object in bytes.
};
/// Reports properties of a compiled library.
struct LibraryInfo
{
PipelineHash internalLibraryHash; ///< 128-bit identifier extracted from this library's ELF binary, composed of
/// the state the compiler decided was appropriate to identify the compiled
/// library. The lower 64 bits are "stable"; the upper 64 bits are "unique".
};
/// Reports shader stats. Multiple bits set in the shader stage mask indicates that multiple shaders have been combined
/// due to HW support. The same information will be repeated for both the constituent shaders in this case.
struct ShaderLibStats
{
ShaderHash shaderHash; ///< Shader hash.
CommonShaderStats common; ///< The shader compilation parameters for this shader.
/// Maximum number of VGPRs the compiler was allowed to use for this shader. This limit will be the minimum
/// of any architectural restriction and any client-requested limit intended to increase the number of waves in
/// flight.
uint32 numAvailableVgprs;
/// Maximum number of SGPRs the compiler was allowed to use for this shader. This limit will be the minimum
/// of any architectural restriction and any client-requested limit intended to increase the number of waves in
/// flight.
uint32 numAvailableSgprs;
size_t isaSizeInBytes; ///< Size of the shader ISA disassembly for this shader.
PipelineHash palInternalLibraryHash; ///< Internal hash of the shader compilation data used by PAL.
uint32 stackFrameSizeInBytes; ///< Shader function stack frame size
ShaderSubType shaderSubType; ///< ShaderSubType / Shader Kind
CompilerStackSizes cpsStackSizes; ///< Stack used in Continuation
};
/**
***********************************************************************************************************************
* @interface IShaderLibrary
* @brief Object containing one or more shader functions stored in GPU memory. These shader functions are callable
* from the shaders contained within IPipeline objects.
*
* Before a pipeline which calls into this library is bound to a command buffer (using @ref ICmdBuffer::BindPipeline),
* the client must call @ref IPipeline::LinkWithLibraries() and specify this library in the list of linked libraries.
* Failure to comply with this requirement is an error and will result in undefined behavior.
*
* @see IDevice::CreateShaderLibrary()
* @see IPipeline::LinkWithLibraries()
***********************************************************************************************************************
*/
class IShaderLibrary : public IDestroyable
{
public:
/// Returns properties of this library and its corresponding shader functions.
///
/// @returns Property structure describing this library.
virtual const LibraryInfo& GetInfo() const = 0;
/// Returns a list of GPU memory allocations used by this library.
///
/// @param [in,out] pNumEntries Input value specifies the available size in pAllocInfoList; output value
/// reports the number of GPU memory allocations.
/// @param [out] pAllocInfoList If pAllocInfoList=nullptr, then pNumEntries is ignored on input. On output it
/// will reflect the number of allocations that make up this pipeline. If
/// pAllocInfoList!=nullptr, then on input pNumEntries is assumed to be the number
/// of entries in the pAllocInfoList array. On output, pNumEntries reflects the
/// number of entries in pAllocInfoList that are valid.
/// @returns Success if the allocation info was successfully written to the buffer.
/// + ErrorInvalidValue if the caller provides a buffer size that is different from the size needed.
/// + ErrorInvalidPointer if pNumEntries is nullptr.
virtual Result QueryAllocationInfo(
size_t* pNumEntries,
GpuMemSubAllocInfo* const pAllocInfoList) const = 0;
/// Gives the client access to the resource ID used for internal Pal events.
/// EX: Resource Create, Resource Bind, Resource Destroy.
///
/// @returns The Resource ID.
virtual const void* GetResourceId() const = 0;
/// Obtains the binary code object for this library.
///
/// @param [in, out] pSize Represents the size of the shader ISA code.
///
/// @param [out] pBuffer If non-null, the library ELF is written in the buffer. If null, the size required
/// for the library ELF is given out in the location pSize.
///
/// @returns Success if the library binary was fetched successfully.
/// +ErrorUnavailable if the library binary was not fetched successfully.
virtual Result GetCodeObject(
uint32* pSize,
void* pBuffer) const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const { return m_pClientData; }
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
/// Obtains the compiled shader ISA code for the shader function specified.
///
/// @param [in] pShaderExportName The shader exported name
///
/// @param [in, out] pSize Represents the size of the shader ISA code.
///
/// @param [out] pBuffer If non-null, the shader ISA code is written in the buffer. If null, the size required
/// for the shader ISA is given out in the location pSize.
///
/// @returns Success if the shader ISA code was fetched successfully.
/// +ErrorUnavailable if the shader ISA code was not fetched successfully.
virtual Result GetShaderFunctionCode(
Util::StringView<char> shaderExportName,
size_t* pSize,
void* pBuffer) const = 0;
/// Obtains the shader pre and post compilation stats/params for the specified shader.
///
/// @param [in] pShaderExportName The shader exported name
///
/// @param [out] pShaderStats Pointer to the ShaderStats structure which will be filled with the shader stats for
/// the shader stage mentioned in shaderType. This cannot be nullptr.
/// @param [in] getDisassemblySize If set to true performs disassembly on the shader binary code and reports the
/// size of the disassembly string in ShaderStats::isaSizeInBytes. Else reports 0.
/// @returns Success if the stats were successfully obtained for this shader, including the shader disassembly size.
/// +ErrorUnavailable if a wrong shader stage for this pipeline was specified, or if some internal error
/// occured.
virtual Result GetShaderFunctionStats(
Util::StringView<char> shaderExportName,
ShaderLibStats* pShaderStats) const = 0;
/// Returns the function list owned by this shader library
///
/// @returns A list of ShaderLibraryFunctionInfo.
virtual const Util::Span<const ShaderLibraryFunctionInfo> GetShaderLibFunctionInfos() const = 0;
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IShaderLibrary() : m_pClientData(nullptr) { }
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IShaderLibrary() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
IShaderLibrary(const IShaderLibrary&) = delete;
IShaderLibrary& operator=(const IShaderLibrary&) = delete;
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palShaderLibrary.h
* @brief Defines the Platform Abstraction Library (PAL) IShaderLibrary interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
#include "palStringView.h"
#include "palSpan.h"
namespace Pal
{
struct GpuMemSubAllocInfo;
/// Common flags controlling creation of shader libraries.
union LibraryCreateFlags
{
struct
{
uint32 clientInternal : 1; ///< Internal library not created by the application.
uint32 isGraphics : 1; ///< Whether it is a graphics library
uint32 reserved : 30; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
};
/// Specifies properties about an indirect function belonging to a @ref IShaderLibrary object. Part of the input
/// structure to IDevice::CreateShaderLibrary().
struct ShaderLibraryFunctionInfo
{
Util::StringView<char> symbolName; ///< ELF Symbol name for the associated function.
gpusize gpuVirtAddr; ///< [out] GPU virtual address of the function. This is computed by PAL during
/// library creation.
};
/// Specifies a shader sub type / ShaderKind.
enum class ShaderSubType : uint32
{
Unknown = 0,
Traversal,
RayGeneration,
Intersection,
AnyHit,
ClosestHit,
Miss,
Callable,
LaunchKernel, ///< Raytracing launch kernel
Count
};
/// Specifies properties for creation of a compute @ref IShaderLibrary object. Input structure to
/// IDevice::CreateShaderLibrary().
struct ShaderLibraryCreateInfo
{
LibraryCreateFlags flags; ///< Library creation flags
const void* pCodeObject; ///< Pointer to code-object ELF binary implementing the Pipeline ABI interface.
/// The code-object ELF contains pre-compiled shaders, register values, and
/// additional metadata.
size_t codeObjectSize; ///< Size of code object in bytes.
};
/// Reports properties of a compiled library.
struct LibraryInfo
{
PipelineHash internalLibraryHash; ///< 128-bit identifier extracted from this library's ELF binary, composed of
/// the state the compiler decided was appropriate to identify the compiled
/// library. The lower 64 bits are "stable"; the upper 64 bits are "unique".
Util::StringView<char> colorExports; ///< For a Graphics Partial Pipeline pixel shader, an opaque
/// string to pass to the compiler to build the color export shader.
};
/// Reports shader stats. Multiple bits set in the shader stage mask indicates that multiple shaders have been combined
/// due to HW support. The same information will be repeated for both the constituent shaders in this case.
struct ShaderLibStats
{
ShaderHash shaderHash; ///< Shader hash.
CommonShaderStats common; ///< The shader compilation parameters for this shader.
/// Maximum number of VGPRs the compiler was allowed to use for this shader. This limit will be the minimum
/// of any architectural restriction and any client-requested limit intended to increase the number of waves in
/// flight.
uint32 numAvailableVgprs;
/// Maximum number of SGPRs the compiler was allowed to use for this shader. This limit will be the minimum
/// of any architectural restriction and any client-requested limit intended to increase the number of waves in
/// flight.
uint32 numAvailableSgprs;
size_t isaSizeInBytes; ///< Size of the shader ISA disassembly for this shader.
PipelineHash palInternalLibraryHash; ///< Internal hash of the shader compilation data used by PAL.
uint32 stackFrameSizeInBytes; ///< Shader function stack frame size
ShaderSubType shaderSubType; ///< ShaderSubType / Shader Kind
CompilerStackSizes cpsStackSizes; ///< Stack used in Continuation
};
/**
***********************************************************************************************************************
* @interface IShaderLibrary
* @brief Object containing one or more shader functions stored in GPU memory. These shader functions are callable
* from the shaders contained within IPipeline objects.
*
* Before a pipeline which calls into this library is bound to a command buffer (using @ref ICmdBuffer::BindPipeline),
* the client must call @ref IPipeline::LinkWithLibraries() and specify this library in the list of linked libraries.
* Failure to comply with this requirement is an error and will result in undefined behavior.
*
* @see IDevice::CreateShaderLibrary()
* @see IPipeline::LinkWithLibraries()
***********************************************************************************************************************
*/
class IShaderLibrary : public IDestroyable
{
public:
/// Returns properties of this library and its corresponding shader functions.
///
/// @returns Property structure describing this library.
virtual const LibraryInfo& GetInfo() const = 0;
/// Returns a list of GPU memory allocations used by this library.
///
/// @param [in,out] pNumEntries Input value specifies the available size in pAllocInfoList; output value
/// reports the number of GPU memory allocations.
/// @param [out] pAllocInfoList If pAllocInfoList=nullptr, then pNumEntries is ignored on input. On output it
/// will reflect the number of allocations that make up this pipeline. If
/// pAllocInfoList!=nullptr, then on input pNumEntries is assumed to be the number
/// of entries in the pAllocInfoList array. On output, pNumEntries reflects the
/// number of entries in pAllocInfoList that are valid.
/// @returns Success if the allocation info was successfully written to the buffer.
/// + ErrorInvalidValue if the caller provides a buffer size that is different from the size needed.
/// + ErrorInvalidPointer if pNumEntries is nullptr.
virtual Result QueryAllocationInfo(
size_t* pNumEntries,
GpuMemSubAllocInfo* const pAllocInfoList) const = 0;
/// Gives the client access to the resource ID used for internal Pal events.
/// EX: Resource Create, Resource Bind, Resource Destroy.
///
/// @returns The Resource ID.
virtual const void* GetResourceId() const = 0;
/// Obtains the binary code object for this library.
///
/// @param [in, out] pSize Represents the size of the shader ISA code.
///
/// @param [out] pBuffer If non-null, the library ELF is written in the buffer. If null, the size required
/// for the library ELF is given out in the location pSize.
///
/// @returns Success if the library binary was fetched successfully.
/// +ErrorUnavailable if the library binary was not fetched successfully.
virtual Result GetCodeObject(
uint32* pSize,
void* pBuffer) const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const { return m_pClientData; }
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
/// Obtains the compiled shader ISA code for the shader function specified.
///
/// @param [in] pShaderExportName The shader exported name
///
/// @param [in, out] pSize Represents the size of the shader ISA code.
///
/// @param [out] pBuffer If non-null, the shader ISA code is written in the buffer. If null, the size required
/// for the shader ISA is given out in the location pSize.
///
/// @returns Success if the shader ISA code was fetched successfully.
/// +ErrorUnavailable if the shader ISA code was not fetched successfully.
virtual Result GetShaderFunctionCode(
Util::StringView<char> shaderExportName,
size_t* pSize,
void* pBuffer) const = 0;
/// Obtains the shader pre and post compilation stats/params for the specified shader.
///
/// @param [in] pShaderExportName The shader exported name
///
/// @param [out] pShaderStats Pointer to the ShaderStats structure which will be filled with the shader stats for
/// the shader stage mentioned in shaderType. This cannot be nullptr.
/// @param [in] getDisassemblySize If set to true performs disassembly on the shader binary code and reports the
/// size of the disassembly string in ShaderStats::isaSizeInBytes. Else reports 0.
/// @returns Success if the stats were successfully obtained for this shader, including the shader disassembly size.
/// +ErrorUnavailable if a wrong shader stage for this pipeline was specified, or if some internal error
/// occured.
virtual Result GetShaderFunctionStats(
Util::StringView<char> shaderExportName,
ShaderLibStats* pShaderStats) const = 0;
/// Returns the function list owned by this shader library
///
/// @returns A list of ShaderLibraryFunctionInfo.
virtual const Util::Span<const ShaderLibraryFunctionInfo> GetShaderLibFunctionInfos() const = 0;
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IShaderLibrary() : m_pClientData(nullptr) { }
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IShaderLibrary() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
IShaderLibrary(const IShaderLibrary&) = delete;
IShaderLibrary& operator=(const IShaderLibrary&) = delete;
};
} // Pal