Update amdgpu-windows-interop with latest changes 20251105 (#1728)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
systems-assistant[bot]
2025-11-05 22:09:25 -05:00
committed by GitHub
orang tua ea31a0bf18
melakukan 27f85500f8
136 mengubah file dengan 44352 tambahan dan 44136 penghapusan
File diff ditekan karena terlalu besar Load Diff
@@ -1,204 +1,204 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palCmdAllocator.h
* @brief Defines the Platform Abstraction Library (PAL) ICmdAllocator interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
// Forward declarations.
struct GpuMemSubAllocInfo;
class IGpuMemory;
/// Flags controlling the creation of ICmdAllocator objects.
union CmdAllocatorCreateFlags
{
struct
{
uint32 threadSafe : 1; ///< If set, the allocator will acquire a lock each time it is accessed;
/// otherwise it will not attempt to protect itself from multithreaded
/// access.
uint32 autoMemoryReuse : 1; ///< If set, the allocator will track when the GPU finishes accessing
/// each piece of command memory and attempt to reuse memory which the
/// GPU is done with before allocating more memory from the OS. If not
/// set, memory will only be recycled after a call to
/// @ref ICmdAllocator::Reset().
uint32 disableBusyChunkTracking : 1; ///< If set, the allocator will not do any GPU-side tracking of which
/// command chunks are still in use. It will be the client's (or the
/// application's) responsibility to guarantee that command chunks are
/// not returned to the allocator before the GPU has finished processing
/// them. Failure to guarantee this will result in undefined behavior.
/// This flag has no effect if @ref autoMemoryReuse is not set.
uint32 autoTrimMemory : 1; ///< If set the allocator will automatically trim down the allocations
/// (where all chunks are idle on the freeList). A minimum of
/// allocFreeThreshold allocations are kept for fast reuse.
uint32 reserved : 28; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
};
/// Different type of allocation data that an ICmdAllocator allocates and distributes to command buffers.
enum CmdAllocType : uint32
{
CommandDataAlloc = 0, ///< Data allocated is for executable commands.
EmbeddedDataAlloc, ///< Data allocated is for embedded data.
LargeEmbeddedDataAlloc, ///< Data allocated is for embedded data, allocation is >32kb
GpuScratchMemAlloc, ///< Data allocated is GPU-only accessible at command buffer execution-time. Possible
/// uses like GPU events.
CmdAllocatorTypeCount ///< Number of allocation types for ICmdAllocator's.
};
/// Specifies properties for creation of an ICmdAllocator object. Input structure to IDevice::CreateCmdAllocator().
struct CmdAllocatorCreateInfo
{
CmdAllocatorCreateFlags flags; ///< Flags controlling command allocator creation.
struct
{
GpuHeap allocHeap; ///< Preferred allocation heap. For @ref GpuScratchMemAlloc, this field is
/// ignored and the allocation will always be in GPU-invisible memory. For
/// all other allocation types, this must be CPU-mappable.
/// For best performance, command allocators that will be used by the
/// UVD engine should prefer the Local heap
gpusize allocSize; ///< Size, in bytes, of the GPU memory allocations this allocator will create.
/// It must be an integer multiple of suballocSize.
gpusize suballocSize; ///< Size, in bytes, of the chunks of GPU memory this allocator will give to
/// command buffers. It must be an integer multiple of 4096.
/// Must be greater than zero even if the client doesn't plan on using this
/// allocation type.
uint32 allocFreeThreshold; ///< Minimum count of free allocations that the allocator should keep around
/// for fast reuse. It is used when the autoTrimMemory flag is set.
} allocInfo[CmdAllocatorTypeCount]; ///< Information for each allocation type.
};
/// Output structure for QueryUtilizationInfo().
/// The CmdAllocator utilization data can be queried by PAL clients in order to decide whether to trim the allocations.
struct CmdAllocatorUtilizationInfo
{
uint32 numAllocations; ///< Number of allocations owned by the allocator.
uint32 numFreeChunks; ///< Number of chunks that are reset and not in use.
uint32 numBusyChunks; ///< Number of chunks that in use by the GPU.
uint32 numReuseChunks; ///< Number of chunks that have been 'returned' to the allocator for reuse.
};
/**
***********************************************************************************************************************
* @interface ICmdAllocator
* @brief Allocates and distributes GPU memory to command buffers on the client's behalf.
*
* All ICmdBuffer objects must be associated with an ICmdAllocator at creation. Command buffers may switch command
* allocators when ICmdBuffer::Reset() is called. The set of command buffers associated with a given command allocator
* will query that allocator for additional GPU memory as they are building commands.
*
* To protect against race conditions the client must ask for a thread safe command allocator unless its can guarantee
* that all command buffers associated with a given command allocator will be built, reset, and destroyed in a thread-
* safe manner. It is illegal to destroy a command allocator while it still has command buffers associated with it.
*
* @see IDevice::CreateCmdAllocator()
***********************************************************************************************************************
*/
class ICmdAllocator : public IDestroyable
{
public:
/// Explicitly resets a command allocator, marking all internal GPU memory allocations as unused.
///
/// The client is responsible for guaranteeing that all command buffers associated with this allocator have finished
/// GPU execution and have been explicitly reset before calling this function.
///
/// @param [in] freeMemory If the all GPU and CPU memory allocations should be returned to the OS.
///
/// @returns Success if the command allocator was successfully reset. Otherwise, one of the following errors may be
/// returned:
/// + ErrorUnknown if an internal PAL error occurs.
virtual Result Reset(bool freeMemory) = 0;
/// Explicitly trims a command allocator, deleting as many unused internal GPU memory allocations as possible.
///
/// @returns Success if the command allocator was successfully trimmed.
///
/// @param [in] allocTypeMask Gives control whether trimming will be applied for each CmdAllocType.
/// Use (1 << CmdAllocatorTypeCount) - 1 to apply trimming to all types.
/// When trimming only the embedded date use (1 << EmbeddedDataAlloc).
/// @param [in] dynamicThreshold Minimum count of free allocations that the allocator should keep around
virtual Result Trim(uint32 allocTypeMask, uint32 dynamicThreshold) = 0;
/// Query the numbers of allocations and chunks of the given CmdAllocator type.
/// This may help clients to decide whether they may apply trimming or not.
///
/// @returns Success if valid values can be reported.
///
/// @param [in] type CmdAllocType that is being queried
/// @param [out] pUtilizationInfo The allocation and chunk counts will be stored here.
virtual Result QueryUtilizationInfo(CmdAllocType type, CmdAllocatorUtilizationInfo* pUtilizationInfo) const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
ICmdAllocator() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~ICmdAllocator() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palCmdAllocator.h
* @brief Defines the Platform Abstraction Library (PAL) ICmdAllocator interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
// Forward declarations.
struct GpuMemSubAllocInfo;
class IGpuMemory;
/// Flags controlling the creation of ICmdAllocator objects.
union CmdAllocatorCreateFlags
{
struct
{
uint32 threadSafe : 1; ///< If set, the allocator will acquire a lock each time it is accessed;
/// otherwise it will not attempt to protect itself from multithreaded
/// access.
uint32 autoMemoryReuse : 1; ///< If set, the allocator will track when the GPU finishes accessing
/// each piece of command memory and attempt to reuse memory which the
/// GPU is done with before allocating more memory from the OS. If not
/// set, memory will only be recycled after a call to
/// @ref ICmdAllocator::Reset().
uint32 disableBusyChunkTracking : 1; ///< If set, the allocator will not do any GPU-side tracking of which
/// command chunks are still in use. It will be the client's (or the
/// application's) responsibility to guarantee that command chunks are
/// not returned to the allocator before the GPU has finished processing
/// them. Failure to guarantee this will result in undefined behavior.
/// This flag has no effect if @ref autoMemoryReuse is not set.
uint32 autoTrimMemory : 1; ///< If set the allocator will automatically trim down the allocations
/// (where all chunks are idle on the freeList). A minimum of
/// allocFreeThreshold allocations are kept for fast reuse.
uint32 reserved : 28; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
};
/// Different type of allocation data that an ICmdAllocator allocates and distributes to command buffers.
enum CmdAllocType : uint32
{
CommandDataAlloc = 0, ///< Data allocated is for executable commands.
EmbeddedDataAlloc, ///< Data allocated is for embedded data.
LargeEmbeddedDataAlloc, ///< Data allocated is for embedded data, allocation is >32kb
GpuScratchMemAlloc, ///< Data allocated is GPU-only accessible at command buffer execution-time. Possible
/// uses like GPU events.
CmdAllocatorTypeCount ///< Number of allocation types for ICmdAllocator's.
};
/// Specifies properties for creation of an ICmdAllocator object. Input structure to IDevice::CreateCmdAllocator().
struct CmdAllocatorCreateInfo
{
CmdAllocatorCreateFlags flags; ///< Flags controlling command allocator creation.
struct
{
GpuHeap allocHeap; ///< Preferred allocation heap. For @ref GpuScratchMemAlloc, this field is
/// ignored and the allocation will always be in GPU-invisible memory. For
/// all other allocation types, this must be CPU-mappable.
/// For best performance, command allocators that will be used by the
/// UVD engine should prefer the Local heap
gpusize allocSize; ///< Size, in bytes, of the GPU memory allocations this allocator will create.
/// It must be an integer multiple of suballocSize.
gpusize suballocSize; ///< Size, in bytes, of the chunks of GPU memory this allocator will give to
/// command buffers. It must be an integer multiple of 4096.
/// Must be greater than zero even if the client doesn't plan on using this
/// allocation type.
uint32 allocFreeThreshold; ///< Minimum count of free allocations that the allocator should keep around
/// for fast reuse. It is used when the autoTrimMemory flag is set.
} allocInfo[CmdAllocatorTypeCount]; ///< Information for each allocation type.
};
/// Output structure for QueryUtilizationInfo().
/// The CmdAllocator utilization data can be queried by PAL clients in order to decide whether to trim the allocations.
struct CmdAllocatorUtilizationInfo
{
uint32 numAllocations; ///< Number of allocations owned by the allocator.
uint32 numFreeChunks; ///< Number of chunks that are reset and not in use.
uint32 numBusyChunks; ///< Number of chunks that in use by the GPU.
uint32 numReuseChunks; ///< Number of chunks that have been 'returned' to the allocator for reuse.
};
/**
***********************************************************************************************************************
* @interface ICmdAllocator
* @brief Allocates and distributes GPU memory to command buffers on the client's behalf.
*
* All ICmdBuffer objects must be associated with an ICmdAllocator at creation. Command buffers may switch command
* allocators when ICmdBuffer::Reset() is called. The set of command buffers associated with a given command allocator
* will query that allocator for additional GPU memory as they are building commands.
*
* To protect against race conditions the client must ask for a thread safe command allocator unless its can guarantee
* that all command buffers associated with a given command allocator will be built, reset, and destroyed in a thread-
* safe manner. It is illegal to destroy a command allocator while it still has command buffers associated with it.
*
* @see IDevice::CreateCmdAllocator()
***********************************************************************************************************************
*/
class ICmdAllocator : public IDestroyable
{
public:
/// Explicitly resets a command allocator, marking all internal GPU memory allocations as unused.
///
/// The client is responsible for guaranteeing that all command buffers associated with this allocator have finished
/// GPU execution and have been explicitly reset before calling this function.
///
/// @param [in] freeMemory If the all GPU and CPU memory allocations should be returned to the OS.
///
/// @returns Success if the command allocator was successfully reset. Otherwise, one of the following errors may be
/// returned:
/// + ErrorUnknown if an internal PAL error occurs.
virtual Result Reset(bool freeMemory) = 0;
/// Explicitly trims a command allocator, deleting as many unused internal GPU memory allocations as possible.
///
/// @returns Success if the command allocator was successfully trimmed.
///
/// @param [in] allocTypeMask Gives control whether trimming will be applied for each CmdAllocType.
/// Use (1 << CmdAllocatorTypeCount) - 1 to apply trimming to all types.
/// When trimming only the embedded date use (1 << EmbeddedDataAlloc).
/// @param [in] dynamicThreshold Minimum count of free allocations that the allocator should keep around
virtual Result Trim(uint32 allocTypeMask, uint32 dynamicThreshold) = 0;
/// Query the numbers of allocations and chunks of the given CmdAllocator type.
/// This may help clients to decide whether they may apply trimming or not.
///
/// @returns Success if valid values can be reported.
///
/// @param [in] type CmdAllocType that is being queried
/// @param [out] pUtilizationInfo The allocation and chunk counts will be stored here.
virtual Result QueryUtilizationInfo(CmdAllocType type, CmdAllocatorUtilizationInfo* pUtilizationInfo) const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
ICmdAllocator() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~ICmdAllocator() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
File diff ditekan karena terlalu besar Load Diff
@@ -1,370 +1,370 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palCmdTracking.h
* @brief Defines a number of support classes used for construction and storage of struct TrackedCmdLocation
* defined in trackedCmdLocation.h
*
* - struct TrackingEventInfo: A single from uint8 to name, used for logging
* - class TrackedCmdSupportBase A set of TrackingEventInfo, maintained outside of Pal
* - class TrackedCmdLocationArray The arrays for TrackedCmdLocation's used for reporting
* correlation data through ICmdBufferReporting::CorrelationReportOnSubmit
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palVector.h"
#include "trackedCmdLocation.h"
namespace Pal
{
// forward decl
class Platform;
namespace CmdDisassembly
{
// forward definition
class TrackedCmdLocationArray;
/**
************************************************************************************************************************
* @brief class TrackedCmdLocationRef
* A copyable reference to a member in a TrackedCmdLocationArray, invariant to that array be
* re-allocated.
*
* @detail Is simply a pointer to a TrackedCmdLocationArray, and an index in to that array
*
************************************************************************************************************************
*/
class TrackedCmdLocationRef
{
public:
TrackedCmdLocationRef()
: m_pSourceArray(nullptr),
m_index(0)
{
}
TrackedCmdLocationRef(
TrackedCmdLocationArray* pSourceArray,
Util::uint32 index)
: m_pSourceArray(pSourceArray),
m_index(index)
{
}
TrackedCmdLocationRef(
TrackedCmdLocationRef&& other) = default;
TrackedCmdLocationRef(
TrackedCmdLocationRef const& other) = default;
TrackedCmdLocationRef& operator=(
TrackedCmdLocationRef&& other) = default;
TrackedCmdLocationRef& operator=(
TrackedCmdLocationRef const& other) = default;
bool operator==(
TrackedCmdLocationRef const& other) const
{ return (this->m_pSourceArray == other.m_pSourceArray) && (this->m_index == other.m_index); }
bool operator!=(
TrackedCmdLocationRef const& other) const
{ return (this->m_pSourceArray != other.m_pSourceArray) || (this->m_index != other.m_index); }
TrackedCmdLocation* Use();
const TrackedCmdLocation* Get() const;
Util::uint32 GetIndex() const
{
return m_index;
}
/// Helper functions
///
/// Clears the TrackedCmdLocation referred to by this TrackedCmdLocationRef
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result Clear();
/// @returns
/// TrackedCmdLocationMode::Invalid if (IsValid() == false)
/// Get()->m_mode otherwise
TrackedCmdLocationMode GetMode() const;
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::Before
///
/// @param [in] eventId Refers to an uint8 event that has a begin and/or an end associated with it
/// Most likely, a value registered to a TrackedCmdSupportBase
/// @param [in] beforePtr The end pointer for the cmdList being tracked before the event referred to by eventId
/// Only 48-bits of beforePtr are used
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsBefore(
uint8 eventId,
uint64 beforePtr);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::After
///
/// @param [in] eventId Refers to an uint8 event that has a begin and/or an end associated with it
/// Most likely, a value registered to a TrackedCmdSupportBase
/// @param [in] afterPtr The end pointer for the cmdList being tracked after the event referred to by eventId
/// Only 48-bits of afterPtr are used
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsAfter(
uint8 eventId,
uint64 afterPtr);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::Delta, with no begin or end (ie, no data can be written to
/// the cmdList being tracked "during" the event referred to be eventId
///
/// @param [in] eventId Refers to an uint8 event that does not have a begin and/or an end associated with it
/// Such as Pal::CmdDisassembly::TrackedCmdLocation::PostClientEvent
/// @param [in] ptr The end pointer for the cmdList being tracked after the event referred to by eventId
/// Only 48-bits of ptr are used
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsEmptyDelta(
uint8 eventId,
uint64 ptr);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::ClientId
///
/// @param [in] clientId A 61-bit bit value used by the client application to identify which cmdList is being
/// tracked
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsClientId(
uint64 clientId);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::ClientEventId
///
/// @param [in] clientEventId A 61-bit bit value used by the client application to identify
/// a client event relative to the current end position of the cmdList being tracked
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsClientEvent(
uint64 clientEventId);
/// @brief bool TrackedCmdLocation::TrySetAsDelta(uint64 afterPtr)
/// Will attempt to set this TrackedCmdLocation to type TrackedCmdLocationMode::Delta
///
/// @detail If GetMode() == TrackedCmdLocationMode::Before and afterPtr - m_correlateInternal.m_ptr is small
/// enough to be encoded in m_correlateInternal.m_deltaInDWords, the mode will be altered to
/// TrackedCmdLocationMode::Delta, with afterPtr - m_correlateInternal.m_ptr encoded in
/// m_correlateInternal.m_deltaInDWords.
/// If this attempt fails, the calling function should instead create a TrackedCmdLocationMode::After
/// TrackedCmdLocation
///
/// @param [in] afterPtr, the value a TrackedCmdLocationMode::After would have for m_correlateInternal.m_ptr
/// @return Result::Success if it was possible to set this TrackedCmdLocation to type
/// TrackedCmdLocationMode::Delta
/// Result::Unsupported if the conditions described above are not met.
Result TrySetAsDelta(
uint64 afterPtr);
private:
TrackedCmdLocationArray* m_pSourceArray;
Util::uint32 m_index;
Result SetMode(
TrackedCmdLocationMode mode);
};
/// @brief struct TrackingEventInfo
/// Essentially just a name, plus a boolean to indicate whether the name is valid / has been set
struct TrackingEventInfo
{
Util::StringView<char> name;
bool isValid;
TrackingEventInfo()
: isValid(false)
{}
};
/**
************************************************************************************************************************
* @brief class TrackedCmdSupportBase translates eventId's to strings for internal correlation events
*
* @detail For use in Pal::Queue when dumping to text files. Corresponds to
* TrackedCmdLocation::m_correlateInternal.m_event for the cases where TrackedCmdLocation::m_mode
* is not TrackedCmdLocationMode::ClientEvent
*
* The implementation for this is in whatever client of Pal that is creating the internal correlation events,
*
************************************************************************************************************************
*/
class TrackedCmdSupportBase
{
public:
virtual ~TrackedCmdSupportBase() = default;
void SetEventIdName(
uint8 eventId,
const char* name)
{
PAL_ASSERT(static_cast<uint32>(eventId) < NumUInt8Values);
m_allEventsMap[eventId].name = name;
m_allEventsMap[eventId].isValid = true;
}
TrackingEventInfo const& GetEventInfo(
uint8 eventId) const
{
PAL_ASSERT(static_cast<uint32>(eventId) < NumUInt8Values);
return m_allEventsMap[eventId];
}
protected:
static constexpr uint32 NumUInt8Values = UINT8_MAX + 1;
TrackingEventInfo m_allEventsMap[NumUInt8Values];
TrackedCmdSupportBase() = default;
};
/**
************************************************************************************************************************
* @brief class TrackedCmdLocationArray is simple a TrackedCmdLocationVec together with a clientId
* and some helpers. TrackedCmdLocationArray live on Pal::GfxCmdBuffer
*
* @detail Each Pal::GfxCmdBuffer has at most CmdDisassembly::MaxNumSubCmdBuffers TrackedCmdLocationArray's
* corresponding to Pal::GfxCmdBuffer::NumCmdStreams();
*
* The clientId used for TrackedCmdLocationArray::m_clientId, corresponds to the client Id used in
* TrackedCmdLocation::m_clientId.m_clientId
*
* For the moment, the underlying implementation used is
* Util::Vector<TrackedCmdLocation, DefaultCapacity, Pal::Platform>, but could be changed to use a Chunk
* scheme, especially as sizes of cmdLists can become very large.
* The only requirement to a change, is for TrackedCmdLocationRef continues to function as an accessor
*
* Note that the functions in TrackedCmdLocationArray are not designed for thread-safety, as they are
* issued from command-list-building functions that are, in their turn, not thread safe. Adding mutex
* behavior here would potentially hide issues relating to thread-safety.
*
************************************************************************************************************************
*/
class TrackedCmdLocationArray
{
public:
static constexpr uint32 DefaultCapacity = 1024;
static constexpr uint32 BadIndex = UINT32_MAX;
static constexpr uint64 InvalidClientId = UINT64_MAX;
typedef Util::Vector<TrackedCmdLocation, DefaultCapacity, Pal::Platform> TrackedCmdLocationVec;
static uint32 GetTrackedCmdLocationArraySizeInBytes()
{
return sizeof(TrackedCmdLocationArray);
}
static TrackedCmdLocationArray* CreateTrackedCmdLocationArray(
void* pMemory,
Pal::Platform* pPlatform);
void Reset()
{
m_lastLocation = TrackedCmdLocationRef(this, BadIndex);
m_clientId = InvalidClientId;
m_locations.Clear();
}
void Destroy();
uint64 GetClientId() const
{
return m_clientId;
}
Result SetClientId(
uint64 clientId);
Util::uint32 GetTotalSize() const
{
return m_locations.size();
}
const TrackedCmdLocationVec& GetLocationsVec() const
{
return m_locations;
}
TrackedCmdLocationVec& UseLocationsVec()
{
return m_locations;
}
Pal::Result MakeNext(
TrackedCmdLocationRef* pResult);
const TrackedCmdLocationRef GetLast() const
{
return m_lastLocation;
}
bool IsLast(
TrackedCmdLocationRef const& location) const
{
return location == m_lastLocation;
}
private:
TrackedCmdLocationVec m_locations;
Pal::Platform* m_pPlatform;
uint64 m_clientId;
TrackedCmdLocationRef m_lastLocation;
TrackedCmdLocationArray(
Pal::Platform* pPlatform);
~TrackedCmdLocationArray() = default;
};
} // namespace CmdDisassembly
} // namespace Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palCmdTracking.h
* @brief Defines a number of support classes used for construction and storage of struct TrackedCmdLocation
* defined in trackedCmdLocation.h
*
* - struct TrackingEventInfo: A single from uint8 to name, used for logging
* - class TrackedCmdSupportBase A set of TrackingEventInfo, maintained outside of Pal
* - class TrackedCmdLocationArray The arrays for TrackedCmdLocation's used for reporting
* correlation data through ICmdBufferReporting::CorrelationReportOnSubmit
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palVector.h"
#include "trackedCmdLocation.h"
namespace Pal
{
// forward decl
class Platform;
namespace CmdDisassembly
{
// forward definition
class TrackedCmdLocationArray;
/**
************************************************************************************************************************
* @brief class TrackedCmdLocationRef
* A copyable reference to a member in a TrackedCmdLocationArray, invariant to that array be
* re-allocated.
*
* @detail Is simply a pointer to a TrackedCmdLocationArray, and an index in to that array
*
************************************************************************************************************************
*/
class TrackedCmdLocationRef
{
public:
TrackedCmdLocationRef()
: m_pSourceArray(nullptr),
m_index(0)
{
}
TrackedCmdLocationRef(
TrackedCmdLocationArray* pSourceArray,
Util::uint32 index)
: m_pSourceArray(pSourceArray),
m_index(index)
{
}
TrackedCmdLocationRef(
TrackedCmdLocationRef&& other) = default;
TrackedCmdLocationRef(
TrackedCmdLocationRef const& other) = default;
TrackedCmdLocationRef& operator=(
TrackedCmdLocationRef&& other) = default;
TrackedCmdLocationRef& operator=(
TrackedCmdLocationRef const& other) = default;
bool operator==(
TrackedCmdLocationRef const& other) const
{ return (this->m_pSourceArray == other.m_pSourceArray) && (this->m_index == other.m_index); }
bool operator!=(
TrackedCmdLocationRef const& other) const
{ return (this->m_pSourceArray != other.m_pSourceArray) || (this->m_index != other.m_index); }
TrackedCmdLocation* Use();
const TrackedCmdLocation* Get() const;
Util::uint32 GetIndex() const
{
return m_index;
}
/// Helper functions
///
/// Clears the TrackedCmdLocation referred to by this TrackedCmdLocationRef
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result Clear();
/// @returns
/// TrackedCmdLocationMode::Invalid if (IsValid() == false)
/// Get()->m_mode otherwise
TrackedCmdLocationMode GetMode() const;
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::Before
///
/// @param [in] eventId Refers to an uint8 event that has a begin and/or an end associated with it
/// Most likely, a value registered to a TrackedCmdSupportBase
/// @param [in] beforePtr The end pointer for the cmdList being tracked before the event referred to by eventId
/// Only 48-bits of beforePtr are used
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsBefore(
uint8 eventId,
uint64 beforePtr);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::After
///
/// @param [in] eventId Refers to an uint8 event that has a begin and/or an end associated with it
/// Most likely, a value registered to a TrackedCmdSupportBase
/// @param [in] afterPtr The end pointer for the cmdList being tracked after the event referred to by eventId
/// Only 48-bits of afterPtr are used
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsAfter(
uint8 eventId,
uint64 afterPtr);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::Delta, with no begin or end (ie, no data can be written to
/// the cmdList being tracked "during" the event referred to be eventId
///
/// @param [in] eventId Refers to an uint8 event that does not have a begin and/or an end associated with it
/// Such as Pal::CmdDisassembly::TrackedCmdLocation::PostClientEvent
/// @param [in] ptr The end pointer for the cmdList being tracked after the event referred to by eventId
/// Only 48-bits of ptr are used
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsEmptyDelta(
uint8 eventId,
uint64 ptr);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::ClientId
///
/// @param [in] clientId A 61-bit bit value used by the client application to identify which cmdList is being
/// tracked
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsClientId(
uint64 clientId);
/// Sets the TrackedCmdLocation referred to by this TrackedCmdLocationRef
/// to mode TrackedCmdLocationMode::ClientEventId
///
/// @param [in] clientEventId A 61-bit bit value used by the client application to identify
/// a client event relative to the current end position of the cmdList being tracked
///
/// @returns
/// Result::ErrorInvalidPointer if (IsValid() == false)
/// Result::Success if successful
Result SetAsClientEvent(
uint64 clientEventId);
/// @brief bool TrackedCmdLocation::TrySetAsDelta(uint64 afterPtr)
/// Will attempt to set this TrackedCmdLocation to type TrackedCmdLocationMode::Delta
///
/// @detail If GetMode() == TrackedCmdLocationMode::Before and afterPtr - m_correlateInternal.m_ptr is small
/// enough to be encoded in m_correlateInternal.m_deltaInDWords, the mode will be altered to
/// TrackedCmdLocationMode::Delta, with afterPtr - m_correlateInternal.m_ptr encoded in
/// m_correlateInternal.m_deltaInDWords.
/// If this attempt fails, the calling function should instead create a TrackedCmdLocationMode::After
/// TrackedCmdLocation
///
/// @param [in] afterPtr, the value a TrackedCmdLocationMode::After would have for m_correlateInternal.m_ptr
/// @return Result::Success if it was possible to set this TrackedCmdLocation to type
/// TrackedCmdLocationMode::Delta
/// Result::Unsupported if the conditions described above are not met.
Result TrySetAsDelta(
uint64 afterPtr);
private:
TrackedCmdLocationArray* m_pSourceArray;
Util::uint32 m_index;
Result SetMode(
TrackedCmdLocationMode mode);
};
/// @brief struct TrackingEventInfo
/// Essentially just a name, plus a boolean to indicate whether the name is valid / has been set
struct TrackingEventInfo
{
Util::StringView<char> name;
bool isValid;
TrackingEventInfo()
: isValid(false)
{}
};
/**
************************************************************************************************************************
* @brief class TrackedCmdSupportBase translates eventId's to strings for internal correlation events
*
* @detail For use in Pal::Queue when dumping to text files. Corresponds to
* TrackedCmdLocation::m_correlateInternal.m_event for the cases where TrackedCmdLocation::m_mode
* is not TrackedCmdLocationMode::ClientEvent
*
* The implementation for this is in whatever client of Pal that is creating the internal correlation events,
*
************************************************************************************************************************
*/
class TrackedCmdSupportBase
{
public:
virtual ~TrackedCmdSupportBase() = default;
void SetEventIdName(
uint8 eventId,
const char* name)
{
PAL_ASSERT(static_cast<uint32>(eventId) < NumUInt8Values);
m_allEventsMap[eventId].name = name;
m_allEventsMap[eventId].isValid = true;
}
TrackingEventInfo const& GetEventInfo(
uint8 eventId) const
{
PAL_ASSERT(static_cast<uint32>(eventId) < NumUInt8Values);
return m_allEventsMap[eventId];
}
protected:
static constexpr uint32 NumUInt8Values = UINT8_MAX + 1;
TrackingEventInfo m_allEventsMap[NumUInt8Values];
TrackedCmdSupportBase() = default;
};
/**
************************************************************************************************************************
* @brief class TrackedCmdLocationArray is simple a TrackedCmdLocationVec together with a clientId
* and some helpers. TrackedCmdLocationArray live on Pal::GfxCmdBuffer
*
* @detail Each Pal::GfxCmdBuffer has at most CmdDisassembly::MaxNumSubCmdBuffers TrackedCmdLocationArray's
* corresponding to Pal::GfxCmdBuffer::NumCmdStreams();
*
* The clientId used for TrackedCmdLocationArray::m_clientId, corresponds to the client Id used in
* TrackedCmdLocation::m_clientId.m_clientId
*
* For the moment, the underlying implementation used is
* Util::Vector<TrackedCmdLocation, DefaultCapacity, Pal::Platform>, but could be changed to use a Chunk
* scheme, especially as sizes of cmdLists can become very large.
* The only requirement to a change, is for TrackedCmdLocationRef continues to function as an accessor
*
* Note that the functions in TrackedCmdLocationArray are not designed for thread-safety, as they are
* issued from command-list-building functions that are, in their turn, not thread safe. Adding mutex
* behavior here would potentially hide issues relating to thread-safety.
*
************************************************************************************************************************
*/
class TrackedCmdLocationArray
{
public:
static constexpr uint32 DefaultCapacity = 1024;
static constexpr uint32 BadIndex = UINT32_MAX;
static constexpr uint64 InvalidClientId = UINT64_MAX;
typedef Util::Vector<TrackedCmdLocation, DefaultCapacity, Pal::Platform> TrackedCmdLocationVec;
static uint32 GetTrackedCmdLocationArraySizeInBytes()
{
return sizeof(TrackedCmdLocationArray);
}
static TrackedCmdLocationArray* CreateTrackedCmdLocationArray(
void* pMemory,
Pal::Platform* pPlatform);
void Reset()
{
m_lastLocation = TrackedCmdLocationRef(this, BadIndex);
m_clientId = InvalidClientId;
m_locations.Clear();
}
void Destroy();
uint64 GetClientId() const
{
return m_clientId;
}
Result SetClientId(
uint64 clientId);
Util::uint32 GetTotalSize() const
{
return m_locations.size();
}
const TrackedCmdLocationVec& GetLocationsVec() const
{
return m_locations;
}
TrackedCmdLocationVec& UseLocationsVec()
{
return m_locations;
}
Pal::Result MakeNext(
TrackedCmdLocationRef* pResult);
const TrackedCmdLocationRef GetLast() const
{
return m_lastLocation;
}
bool IsLast(
TrackedCmdLocationRef const& location) const
{
return location == m_lastLocation;
}
private:
TrackedCmdLocationVec m_locations;
Pal::Platform* m_pPlatform;
uint64 m_clientId;
TrackedCmdLocationRef m_lastLocation;
TrackedCmdLocationArray(
Pal::Platform* pPlatform);
~TrackedCmdLocationArray() = default;
};
} // namespace CmdDisassembly
} // namespace Pal
@@ -1,70 +1,70 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palDestroyable.h
* @brief Defines the Platform Abstraction Library (PAL) IDestroyable interface.
***********************************************************************************************************************
*/
#pragma once
namespace Pal
{
/**
***********************************************************************************************************************
* @interface IDestroyable
* @brief Interface inherited by objects that must be explicitly destroyed by the client.
*
* This includes all objects except:
*
* + @ref IColorTargetView, @ref IDepthStencilView - These classes are treated as SRDs by the DX12 runtime. Therefore,
* PAL guarantees that no action needs to be taken at Destroy() - the client should just free the memory backing these
* classes.
* + @ref IDevice - These objects are created during IPlatform::EnumerateDevices() and are automatically destroyed
* along with the Platform object.
* + @ref IPrivateScreen - These objects are created as during IPlatform::EnumerateDevices() based on
* which screens are attached to each device. They are automatically destroyed along with the Platform object.
***********************************************************************************************************************
*/
class IDestroyable
{
public:
/// Frees all resources associated with this object.
///
/// It is the client's responsibility to only call this method once there are no more existing references to this
/// object. This method does not free the system memory associated with the object (as specified in pPlacementAddr
/// during creation); the client is responsible for freeing that memory since they allocated it.
virtual void Destroy() = 0;
protected:
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IDestroyable() { }
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palDestroyable.h
* @brief Defines the Platform Abstraction Library (PAL) IDestroyable interface.
***********************************************************************************************************************
*/
#pragma once
namespace Pal
{
/**
***********************************************************************************************************************
* @interface IDestroyable
* @brief Interface inherited by objects that must be explicitly destroyed by the client.
*
* This includes all objects except:
*
* + @ref IColorTargetView, @ref IDepthStencilView - These classes are treated as SRDs by the DX12 runtime. Therefore,
* PAL guarantees that no action needs to be taken at Destroy() - the client should just free the memory backing these
* classes.
* + @ref IDevice - These objects are created during IPlatform::EnumerateDevices() and are automatically destroyed
* along with the Platform object.
* + @ref IPrivateScreen - These objects are created as during IPlatform::EnumerateDevices() based on
* which screens are attached to each device. They are automatically destroyed along with the Platform object.
***********************************************************************************************************************
*/
class IDestroyable
{
public:
/// Frees all resources associated with this object.
///
/// It is the client's responsibility to only call this method once there are no more existing references to this
/// object. This method does not free the system memory associated with the object (as specified in pPlacementAddr
/// during creation); the client is responsible for freeing that memory since they allocated it.
virtual void Destroy() = 0;
protected:
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IDestroyable() { }
};
} // Pal
File diff ditekan karena terlalu besar Load Diff
File diff ditekan karena terlalu besar Load Diff
@@ -1,171 +1,171 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palFence.h
* @brief Defines the Platform Abstraction Library (PAL) IFence interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
/// Specifies properties for fence @ref IFence fence creation. Input structure to IDevice::CreateFence().
struct FenceCreateInfo
{
union
{
struct
{
uint32 signaled : 1; ///< Specify whether the initial status of the fence is signaled or not.
uint32 eventCanBeInherited : 1; ///< The event handle can be inherited by child process.
uint32 shareable : 1; ///< This fence may be opened for use by a different device.
uint32 reserved : 29; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< Fence creation flags.
#if defined(_WIN32)
const wchar_t* pName; /// The name of the event object, Windows uses this name to uniquely identify fence objects
/// across processes.
#endif
};
/// Specifies properties for fence opening. Input structure to IDevice::OpenFence().
struct FenceOpenInfo
{
union
{
struct
{
uint32 isReference : 1; ///< If set, then the opened fence will reference the same sync object
///< in the kernel. Otherwise, the object is copied to the new Fence.
uint32 reserved : 31; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags;
OsExternalHandle externalFence; ///< External shared fence handle.
#if defined(_WIN32)
const wchar_t* pName; /// The name of the event object,Windows uses this name to uniquely identify
/// fence objects across processes.
#endif
};
/// Specifies properties for fence exporting. Input structure to IFence::ExportExternalHandle().
struct FenceExportInfo
{
union
{
struct
{
uint32 isReference : 1; ///< If set, then the fence exporting a handle that reference the same sync
///< object in the kernel. Otherwise, the object is copied to the new Fence.
uint32 implicitReset : 1; ///< If set, a fence reset will be done for the sync fd exported.
uint32 reserved : 30; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags;
};
/**
***********************************************************************************************************************
* @interface IFence
* @brief Represents a command buffer fence the client can use for coarse-level synchronization between the GPU and
* CPU.
*
* Fences can be specified when calling IQueue::Submit() and will be signaled when certain prior queue operations have
* completed. The status of the fence can be queried by the client to determine when the GPU work of interest has
* completed.
*
* Fences are guaranteed to wait for:
* + Prior command buffer submissions.
* + Prior queue semaphore signals and waits.
* + Prior direct presents.
*
* @see IDevice::CreateFence()
***********************************************************************************************************************
*/
class IFence : public IDestroyable
{
public:
/// Gets the status (completed or not) of the fence.
///
/// @returns Success if the fence has been reached, or NotReady if the fence hasn't been reached. Other return
/// codes indicate an error:
/// + ErrorFenceNeverSubmitted if the fence hasn't been submitted yet and the fence is not created with
/// initialSignaled set to true.
virtual Result GetStatus() const = 0;
/// Export the event handle or sync object handle of the fence for external usage.
/// If @ref FenceExportInfo::isReference is not set, then this also performs an implicit reset operation on
/// the Fence.
///
/// @param [in] exportInfo Information describing how the Fence handle should be exported.
/// @returns the handle in the type OsExternalHandle
virtual OsExternalHandle ExportExternalHandle(
const FenceExportInfo& exportInfo) const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IFence() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IFence() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palFence.h
* @brief Defines the Platform Abstraction Library (PAL) IFence interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
/// Specifies properties for fence @ref IFence fence creation. Input structure to IDevice::CreateFence().
struct FenceCreateInfo
{
union
{
struct
{
uint32 signaled : 1; ///< Specify whether the initial status of the fence is signaled or not.
uint32 eventCanBeInherited : 1; ///< The event handle can be inherited by child process.
uint32 shareable : 1; ///< This fence may be opened for use by a different device.
uint32 reserved : 29; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< Fence creation flags.
#if defined(_WIN32)
const wchar_t* pName; /// The name of the event object, Windows uses this name to uniquely identify fence objects
/// across processes.
#endif
};
/// Specifies properties for fence opening. Input structure to IDevice::OpenFence().
struct FenceOpenInfo
{
union
{
struct
{
uint32 isReference : 1; ///< If set, then the opened fence will reference the same sync object
///< in the kernel. Otherwise, the object is copied to the new Fence.
uint32 reserved : 31; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags;
OsExternalHandle externalFence; ///< External shared fence handle.
#if defined(_WIN32)
const wchar_t* pName; /// The name of the event object,Windows uses this name to uniquely identify
/// fence objects across processes.
#endif
};
/// Specifies properties for fence exporting. Input structure to IFence::ExportExternalHandle().
struct FenceExportInfo
{
union
{
struct
{
uint32 isReference : 1; ///< If set, then the fence exporting a handle that reference the same sync
///< object in the kernel. Otherwise, the object is copied to the new Fence.
uint32 implicitReset : 1; ///< If set, a fence reset will be done for the sync fd exported.
uint32 reserved : 30; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags;
};
/**
***********************************************************************************************************************
* @interface IFence
* @brief Represents a command buffer fence the client can use for coarse-level synchronization between the GPU and
* CPU.
*
* Fences can be specified when calling IQueue::Submit() and will be signaled when certain prior queue operations have
* completed. The status of the fence can be queried by the client to determine when the GPU work of interest has
* completed.
*
* Fences are guaranteed to wait for:
* + Prior command buffer submissions.
* + Prior queue semaphore signals and waits.
* + Prior direct presents.
*
* @see IDevice::CreateFence()
***********************************************************************************************************************
*/
class IFence : public IDestroyable
{
public:
/// Gets the status (completed or not) of the fence.
///
/// @returns Success if the fence has been reached, or NotReady if the fence hasn't been reached. Other return
/// codes indicate an error:
/// + ErrorFenceNeverSubmitted if the fence hasn't been submitted yet and the fence is not created with
/// initialSignaled set to true.
virtual Result GetStatus() const = 0;
/// Export the event handle or sync object handle of the fence for external usage.
/// If @ref FenceExportInfo::isReference is not set, then this also performs an implicit reset operation on
/// the Fence.
///
/// @param [in] exportInfo Information describing how the Fence handle should be exported.
/// @returns the handle in the type OsExternalHandle
virtual OsExternalHandle ExportExternalHandle(
const FenceExportInfo& exportInfo) const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IFence() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IFence() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
File diff ditekan karena terlalu besar Load Diff
File diff ditekan karena terlalu besar Load Diff
File diff ditekan karena terlalu besar Load Diff
@@ -1,139 +1,139 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palGpuMemoryBindable.h
* @brief Defines the Platform Abstraction Library (PAL) IGpuMemoryBindable interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
// Forward declarations.
class IGpuMemory;
/// Reports required properties of a GPU memory object bound to a specific object. The client must query these
/// properties via IGpuMemoryBindable::GetGpuMemoryRequirements() and bind an @ref IGpuMemory object matching these
/// requirements to the @ref IGpuMemoryBindable object using IGpuMemoryBindable::BindGpuMemory().
struct GpuMemoryRequirements
{
union
{
struct
{
uint32 cpuAccess : 1; ///< CPU access is required. If set, the client must not set cpuInvisible in
/// GpuMemoryCreateFlags and must provide CPU visible heaps or CPU visible heap
/// access mode. If not set, it's strongly recommended to set cpuInvisible.
uint32 reserved : 31; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< Flags specifying required GPU memory properties.
gpusize size; ///< Amount of GPU memory required, in bytes.
gpusize alignment; ///< Required GPU memory virtual address alignment, in bytes.
uint32 heapCount; ///< Number of valid entries in heaps[].
GpuHeap heaps[GpuHeapCount]; ///< List of allowed heaps for the GPU memory in order of predicted performance.
};
/**
***********************************************************************************************************************
* @interface IGpuMemoryBindable
* @brief Interface inherited by objects that may require GPU memory be bound to them.
*
* In the future, PAL may discover a need to allocate GPU memory for a class that currently doesn't require it. In that
* situation, that class will be updated to inherit from IGpuMemoryBindable. This change would break backward
* compatibility and would result in the major interface version being incremented.
***********************************************************************************************************************
*/
class IGpuMemoryBindable : public IDestroyable
{
public:
/// Queries the GPU memory properties required by this object. The client should query properties with this method,
/// create/sub-allocate a memory range matching the requirements, then bind the memory to the object via
/// @ref BindGpuMemory().
///
/// @note Not all objects may actually need GPU memory, and in that case the memory properties will reflect a 0 size
/// and alignment.
///
/// @param [out] pGpuMemReqs Required properties of GPU memory to be bound to this object. Includes properties like
/// size, alignment, and allowed heaps.
virtual void GetGpuMemoryRequirements(
GpuMemoryRequirements* pGpuMemReqs) const = 0;
/// Binds GPU memory to this object according to the requirements queried via GetGpuMemoryRequirements().
///
/// Binding memory to objects other than images automatically initializes the object memory as necessary. Image
/// objects used as color or depth-stencil targets have to be explicitly initialized in command buffers using a
/// ICmdBuffer::CmdReleaseThenAcquire() command to transition them out of the LayoutUninitializedTarget usage.
///
/// Binding memory to an object automatically unbinds any previously bound memory. There is no need to bind null to
/// an object to explicitly unbind a previously bound allocation before binding a new allocation.
///
/// This call is invalid on objects that have no memory requirements, even if binding null.
///
/// @param [in] pGpuMemory GPU memory to be bound. If null, the previous binding will be released.
/// @param [in] offset Offset into the GPU memory where the object's memory range should begin. This allows
/// sub-allocating many object's GPU memory from the same IGpuMemory object.
///
/// @returns Success if the specified GPU memory was successfully bound to the object. Otherwise, one of the
/// following errors may be returned:
/// + ErrorUnavailable if binding a non-image to a virtual allocation.
/// + ErrorInvalidAlignment if the offset does not match the alignment requirements of the object.
/// + ErrorInvalidMemorySize if the object's required memory size does not fit completely within the given
/// memory object at the specified offset.
virtual Result BindGpuMemory(
IGpuMemory* pGpuMemory,
gpusize offset) = 0;
/// Returns the GPU memory object and offset that this object is bound to or nullptr and 0 if not bound.
///
/// @param [out] ppGpuMemory Returns the GPU memory object to the address specified in this pointer.
/// Returns nullptr if this object is not bound to any GPU memory.
/// @param [out] pOffset Returns the GPU memory offset to the address specified in this pointer.
/// Returns 0 if this object is not bound to any GPU memory.
///
/// @returns Success if the GPU memory and offset was successfully returned. Otherwise, one of the following errors
/// may be returned:
/// + ErrorGpuMemoryNotBound if this object is not bound to any GPU memory.
/// + ErrorInvalidPointer if either ppGpuMemory or pOffset is nullptr.
/// + ErrorUnavailable if binding is not supported in the derived class
virtual Result GetGpuMemory(
IGpuMemory** ppGpuMemory,
gpusize* pOffset) const = 0;
protected:
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IGpuMemoryBindable() { }
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palGpuMemoryBindable.h
* @brief Defines the Platform Abstraction Library (PAL) IGpuMemoryBindable interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
// Forward declarations.
class IGpuMemory;
/// Reports required properties of a GPU memory object bound to a specific object. The client must query these
/// properties via IGpuMemoryBindable::GetGpuMemoryRequirements() and bind an @ref IGpuMemory object matching these
/// requirements to the @ref IGpuMemoryBindable object using IGpuMemoryBindable::BindGpuMemory().
struct GpuMemoryRequirements
{
union
{
struct
{
uint32 cpuAccess : 1; ///< CPU access is required. If set, the client must not set cpuInvisible in
/// GpuMemoryCreateFlags and must provide CPU visible heaps or CPU visible heap
/// access mode. If not set, it's strongly recommended to set cpuInvisible.
uint32 reserved : 31; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< Flags specifying required GPU memory properties.
gpusize size; ///< Amount of GPU memory required, in bytes.
gpusize alignment; ///< Required GPU memory virtual address alignment, in bytes.
uint32 heapCount; ///< Number of valid entries in heaps[].
GpuHeap heaps[GpuHeapCount]; ///< List of allowed heaps for the GPU memory in order of predicted performance.
};
/**
***********************************************************************************************************************
* @interface IGpuMemoryBindable
* @brief Interface inherited by objects that may require GPU memory be bound to them.
*
* In the future, PAL may discover a need to allocate GPU memory for a class that currently doesn't require it. In that
* situation, that class will be updated to inherit from IGpuMemoryBindable. This change would break backward
* compatibility and would result in the major interface version being incremented.
***********************************************************************************************************************
*/
class IGpuMemoryBindable : public IDestroyable
{
public:
/// Queries the GPU memory properties required by this object. The client should query properties with this method,
/// create/sub-allocate a memory range matching the requirements, then bind the memory to the object via
/// @ref BindGpuMemory().
///
/// @note Not all objects may actually need GPU memory, and in that case the memory properties will reflect a 0 size
/// and alignment.
///
/// @param [out] pGpuMemReqs Required properties of GPU memory to be bound to this object. Includes properties like
/// size, alignment, and allowed heaps.
virtual void GetGpuMemoryRequirements(
GpuMemoryRequirements* pGpuMemReqs) const = 0;
/// Binds GPU memory to this object according to the requirements queried via GetGpuMemoryRequirements().
///
/// Binding memory to objects other than images automatically initializes the object memory as necessary. Image
/// objects used as color or depth-stencil targets have to be explicitly initialized in command buffers using a
/// ICmdBuffer::CmdReleaseThenAcquire() command to transition them out of the LayoutUninitializedTarget usage.
///
/// Binding memory to an object automatically unbinds any previously bound memory. There is no need to bind null to
/// an object to explicitly unbind a previously bound allocation before binding a new allocation.
///
/// This call is invalid on objects that have no memory requirements, even if binding null.
///
/// @param [in] pGpuMemory GPU memory to be bound. If null, the previous binding will be released.
/// @param [in] offset Offset into the GPU memory where the object's memory range should begin. This allows
/// sub-allocating many object's GPU memory from the same IGpuMemory object.
///
/// @returns Success if the specified GPU memory was successfully bound to the object. Otherwise, one of the
/// following errors may be returned:
/// + ErrorUnavailable if binding a non-image to a virtual allocation.
/// + ErrorInvalidAlignment if the offset does not match the alignment requirements of the object.
/// + ErrorInvalidMemorySize if the object's required memory size does not fit completely within the given
/// memory object at the specified offset.
virtual Result BindGpuMemory(
IGpuMemory* pGpuMemory,
gpusize offset) = 0;
/// Returns the GPU memory object and offset that this object is bound to or nullptr and 0 if not bound.
///
/// @param [out] ppGpuMemory Returns the GPU memory object to the address specified in this pointer.
/// Returns nullptr if this object is not bound to any GPU memory.
/// @param [out] pOffset Returns the GPU memory offset to the address specified in this pointer.
/// Returns 0 if this object is not bound to any GPU memory.
///
/// @returns Success if the GPU memory and offset was successfully returned. Otherwise, one of the following errors
/// may be returned:
/// + ErrorGpuMemoryNotBound if this object is not bound to any GPU memory.
/// + ErrorInvalidPointer if either ppGpuMemory or pOffset is nullptr.
/// + ErrorUnavailable if binding is not supported in the derived class
virtual Result GetGpuMemory(
IGpuMemory** ppGpuMemory,
gpusize* pOffset) const = 0;
protected:
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IGpuMemoryBindable() { }
};
} // Pal
File diff ditekan karena terlalu besar Load Diff
File diff ditekan karena terlalu besar Load Diff
@@ -1,187 +1,187 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palMsaaState.h
* @brief Defines the Platform Abstraction Library (PAL) IMsaaState interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
/// Specifies conservative rasterization mode
enum class ConservativeRasterizationMode : uint8
{
Overestimate = 0x0, ///< Fragments will be generated if the primitive area covers any portion of the pixel.
Underestimate = 0x1, ///< Fragments will be generated if all of the pixel is covered by the primitive.
Count
};
/// Maximum supported number of MSAA color samples.
constexpr uint32 MaxMsaaColorSamples = 16;
/// Maximum supported number of MSAA depth samples.
constexpr uint32 MaxMsaaDepthSamples = 8;
/// Maximum supported number of MSAA fragments.
constexpr uint32 MaxMsaaFragments = 8;
/// Sampling pattern grid size. This is a quad of pixels, i.e. 2x2 grid of pixels.
constexpr Extent2d MaxGridSize = { 2, 2 };
/// The positions are rounded to 1/Pow2(SubPixelBits)
constexpr uint32 SubPixelBits = 4;
/// Each pixel is subdivided into Pow2(SubPixelBits) x Pow2(SubPixelBits) grid of possible sample locations.
constexpr Extent2d SubPixelGridSize = { 16, 16 };
/// Represents a 2D coordinate with each component in [-8/16, 7/16]
struct SampleLocation
{
int8 x; ///< X offset.
int8 y; ///< Y offset.
/// Conversion operator that does sign-extension.
operator Offset2d() const { return { x, y }; }
};
/// Specifies a custom multisample pattern for a pixel quad.
struct MsaaQuadSamplePattern
{
SampleLocation topLeft[MaxMsaaRasterizerSamples]; ///< Sample locations for TL pixel of quad.
SampleLocation topRight[MaxMsaaRasterizerSamples]; ///< Sample locations for TR pixel of quad.
SampleLocation bottomLeft[MaxMsaaRasterizerSamples]; ///< Sample locations for BL pixel of quad.
SampleLocation bottomRight[MaxMsaaRasterizerSamples]; ///< Sample locations for BR pixel of quad.
};
/// Specifies properties for creation of an @ref IMsaaState object. Input structure to IDevice::CreateMsaaState().
struct MsaaStateCreateInfo
{
uint8 coverageSamples; ///< Number of rasterizer samples. Must be greater than or equal to all sample
/// rates in the pipeline. Valid values are 1, 2, 4, 8, and 16.
uint8 exposedSamples; ///< Number of samples exposed in the pixel shader coverage mask. Must be less
/// than or equal to coverageSamples. Valid values are 1, 2, 4, and 8.
uint8 pixelShaderSamples; ///< Controls the pixel shader execution rate. Must be less than or equal to
/// coverageSamples. Valid values are 1, 2, 4, and 8. Note that value with
/// greater than 1 doesn't mean sample rate shading is enabled. Sample rate
/// shading is enabled by either @ref forceSampleRateShading or pixel shader.
uint8 depthStencilSamples; ///< Number of samples in the bound depth target. Must be less than or equal to
/// coverageSamples. Valid values are 1, 2, 4, and 8.
uint8 shaderExportMaskSamples; ///< Number of samples to use in the shader export mask. Should match the number
/// of color target fragments clamped to
/// @ref DeviceProperties imageProperties.maxMsaaFragments.
uint8 sampleClusters; ///< Number of sample clusters to control over-rasterization (all samples in a
/// cluster are rasterized if any are hit). Must be less than or equal to
/// coverageSamples. Valid values are 1, 2, 4, and 8.
uint8 alphaToCoverageSamples; ///< How many samples of quality to generate with alpha-to-coverage. Must be
/// less than or equal to coverageSamples. Valid values are 1, 2, 4, 8, and 16.
uint8 occlusionQuerySamples; ///< Controls the number of samples to use for occlusion queries.
/// This value must never exceed the MSAA rate.
uint16 sampleMask; ///< Bitmask of which color target and depth/stencil samples should be updated.
/// The lowest bit corresponds to sample 0.
/// Selects overestimate or underestimate conservative rasterization mode. Used only if
/// @ref MsaaStateCreateInfo::flags::enableConservativeRasterization is set to true.
ConservativeRasterizationMode conservativeRasterizationMode;
union
{
struct
{
uint8 enableConservativeRasterization : 1; ///< Set to true to enable conservative rasterization
uint8 enable1xMsaaSampleLocations : 1; ///< Set to true to enable 1xMSAA quad sample pattern
uint8 disableAlphaToCoverageDither : 1; ///< Disables coverage dithering.
uint8 enableLineStipple : 1; ///< Set to true to enable line stippling
uint8 forceSampleRateShading : 1; ///< Sample rate shading can be enabled by either the pixel
/// shader, or forced here with forceSampleRateShading = 1.
/// Value 0 means sample rate shading is decided by pixel shader
/// and value 1 means sample rate shading is forced enabled.
/// This bit is for openGL glMinSampleShading, where sample rate
/// shading can be enabled by glEnable(GL_SAMPLE_SHADING)
/// instead of by the pixel shader.
uint8 reserved : 3; ///< Reserved for future use
};
uint8 u8All;
} flags;
};
/**
***********************************************************************************************************************
* @interface IMsaaState
* @brief Dynamic state object controlling fixed function MSAA state.
*
* Configures sample counts of various portions of the pipeline, specifies sample positions, etc. The full range of
* EQAA hardware features are exposed.
*
* @see IDevice::CreateMsaaState
***********************************************************************************************************************
*/
class IMsaaState : public IDestroyable
{
public:
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IMsaaState() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IMsaaState() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palMsaaState.h
* @brief Defines the Platform Abstraction Library (PAL) IMsaaState interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
namespace Pal
{
/// Specifies conservative rasterization mode
enum class ConservativeRasterizationMode : uint8
{
Overestimate = 0x0, ///< Fragments will be generated if the primitive area covers any portion of the pixel.
Underestimate = 0x1, ///< Fragments will be generated if all of the pixel is covered by the primitive.
Count
};
/// Maximum supported number of MSAA color samples.
constexpr uint32 MaxMsaaColorSamples = 16;
/// Maximum supported number of MSAA depth samples.
constexpr uint32 MaxMsaaDepthSamples = 8;
/// Maximum supported number of MSAA fragments.
constexpr uint32 MaxMsaaFragments = 8;
/// Sampling pattern grid size. This is a quad of pixels, i.e. 2x2 grid of pixels.
constexpr Extent2d MaxGridSize = { 2, 2 };
/// The positions are rounded to 1/Pow2(SubPixelBits)
constexpr uint32 SubPixelBits = 4;
/// Each pixel is subdivided into Pow2(SubPixelBits) x Pow2(SubPixelBits) grid of possible sample locations.
constexpr Extent2d SubPixelGridSize = { 16, 16 };
/// Represents a 2D coordinate with each component in [-8/16, 7/16]
struct SampleLocation
{
int8 x; ///< X offset.
int8 y; ///< Y offset.
/// Conversion operator that does sign-extension.
operator Offset2d() const { return { x, y }; }
};
/// Specifies a custom multisample pattern for a pixel quad.
struct MsaaQuadSamplePattern
{
SampleLocation topLeft[MaxMsaaRasterizerSamples]; ///< Sample locations for TL pixel of quad.
SampleLocation topRight[MaxMsaaRasterizerSamples]; ///< Sample locations for TR pixel of quad.
SampleLocation bottomLeft[MaxMsaaRasterizerSamples]; ///< Sample locations for BL pixel of quad.
SampleLocation bottomRight[MaxMsaaRasterizerSamples]; ///< Sample locations for BR pixel of quad.
};
/// Specifies properties for creation of an @ref IMsaaState object. Input structure to IDevice::CreateMsaaState().
struct MsaaStateCreateInfo
{
uint8 coverageSamples; ///< Number of rasterizer samples. Must be greater than or equal to all sample
/// rates in the pipeline. Valid values are 1, 2, 4, 8, and 16.
uint8 exposedSamples; ///< Number of samples exposed in the pixel shader coverage mask. Must be less
/// than or equal to coverageSamples. Valid values are 1, 2, 4, and 8.
uint8 pixelShaderSamples; ///< Controls the pixel shader execution rate. Must be less than or equal to
/// coverageSamples. Valid values are 1, 2, 4, and 8. Note that value with
/// greater than 1 doesn't mean sample rate shading is enabled. Sample rate
/// shading is enabled by either @ref forceSampleRateShading or pixel shader.
uint8 depthStencilSamples; ///< Number of samples in the bound depth target. Must be less than or equal to
/// coverageSamples. Valid values are 1, 2, 4, and 8.
uint8 shaderExportMaskSamples; ///< Number of samples to use in the shader export mask. Should match the number
/// of color target fragments clamped to
/// @ref DeviceProperties imageProperties.maxMsaaFragments.
uint8 sampleClusters; ///< Number of sample clusters to control over-rasterization (all samples in a
/// cluster are rasterized if any are hit). Must be less than or equal to
/// coverageSamples. Valid values are 1, 2, 4, and 8.
uint8 alphaToCoverageSamples; ///< How many samples of quality to generate with alpha-to-coverage. Must be
/// less than or equal to coverageSamples. Valid values are 1, 2, 4, 8, and 16.
uint8 occlusionQuerySamples; ///< Controls the number of samples to use for occlusion queries.
/// This value must never exceed the MSAA rate.
uint16 sampleMask; ///< Bitmask of which color target and depth/stencil samples should be updated.
/// The lowest bit corresponds to sample 0.
/// Selects overestimate or underestimate conservative rasterization mode. Used only if
/// @ref MsaaStateCreateInfo::flags::enableConservativeRasterization is set to true.
ConservativeRasterizationMode conservativeRasterizationMode;
union
{
struct
{
uint8 enableConservativeRasterization : 1; ///< Set to true to enable conservative rasterization
uint8 enable1xMsaaSampleLocations : 1; ///< Set to true to enable 1xMSAA quad sample pattern
uint8 disableAlphaToCoverageDither : 1; ///< Disables coverage dithering.
uint8 enableLineStipple : 1; ///< Set to true to enable line stippling
uint8 forceSampleRateShading : 1; ///< Sample rate shading can be enabled by either the pixel
/// shader, or forced here with forceSampleRateShading = 1.
/// Value 0 means sample rate shading is decided by pixel shader
/// and value 1 means sample rate shading is forced enabled.
/// This bit is for openGL glMinSampleShading, where sample rate
/// shading can be enabled by glEnable(GL_SAMPLE_SHADING)
/// instead of by the pixel shader.
uint8 reserved : 3; ///< Reserved for future use
};
uint8 u8All;
} flags;
};
/**
***********************************************************************************************************************
* @interface IMsaaState
* @brief Dynamic state object controlling fixed function MSAA state.
*
* Configures sample counts of various portions of the pipeline, specifies sample positions, etc. The full range of
* EQAA hardware features are exposed.
*
* @see IDevice::CreateMsaaState
***********************************************************************************************************************
*/
class IMsaaState : public IDestroyable
{
public:
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IMsaaState() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IMsaaState() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
File diff ditekan karena terlalu besar Load Diff
File diff ditekan karena terlalu besar Load Diff
File diff ditekan karena terlalu besar Load Diff
@@ -1,234 +1,234 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palQueryPool.h
* @brief Defines the Platform Abstraction Library (PAL) IQueryPool interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palGpuMemoryBindable.h"
namespace Pal
{
/// Specifies a category of GPU query pool.
enum class QueryPoolType : uint32
{
Occlusion = 0x0, ///< Occlusion query pool. Supports queries based on the Z test.
PipelineStats = 0x1, ///< Pipeline stats query pool. Supports queries based on statistics from the GPU's execution
/// such as a count of prims generated, shader invocations, etc.
StreamoutStats = 0x2, ///< Streamout query pool. Supports queries based on statistics from the GPU's execution
/// such as number of primitives written to SO buffer and storage needed.
Count,
};
/// Specifies what data a query slot must produce. Some query pool types support multiple query types.
enum class QueryType : uint32
{
Occlusion = 0x0, ///< The total passes recorded by the Z test.
BinaryOcclusion = 0x1, ///< One if there were one or more Z test passes, zero otherwise.
PipelineStats = 0x2, ///< The total statistics selected by the given pipeline stats query pool.
StreamoutStats = 0x3, ///< SO statistics tracked by CP/VGT including primitives written and storage needed.
StreamoutStats1 = 0x4, ///< SO1 statistics tracked by CP/VGT including primitives written and storage needed.
StreamoutStats2 = 0x5, ///< SO2 statistics tracked by CP/VGT including primitives written and storage needed.
StreamoutStats3 = 0x6, ///< SO3 statistics tracked by CP/VGT including primitives written and storage needed.
Count,
};
/// Specifies which pipeline stats should be tracked by a pipeline stats query pool.
enum QueryPipelineStatsFlags : uint32
{
QueryPipelineStatsIaVertices = 0x1, ///< Input vertices.
QueryPipelineStatsIaPrimitives = 0x2, ///< Input primitives.
QueryPipelineStatsVsInvocations = 0x4, ///< Vertex shader invocations.
QueryPipelineStatsGsInvocations = 0x8, ///< Geometry shader invocations.
QueryPipelineStatsGsPrimitives = 0x10, ///< Geometry shader primitives.
QueryPipelineStatsCInvocations = 0x20, ///< Clipper invocations.
QueryPipelineStatsCPrimitives = 0x40, ///< Clipper primitives.
QueryPipelineStatsPsInvocations = 0x80, ///< Pixel shader invocations.
QueryPipelineStatsHsInvocations = 0x100, ///< Hull shader invocations.
QueryPipelineStatsDsInvocations = 0x200, ///< Domain shader invocations.
QueryPipelineStatsCsInvocations = 0x400, ///< Compute shader invocations.
QueryPipelineStatsTsInvocations = 0x800, ///< Task shader invocations.
QueryPipelineStatsMsInvocations = 0x1000, ///< Mesh shader invocations.
QueryPipelineStatsMsPrimitives = 0x2000, ///< Mesh shader primitives.
QueryPipelineStatsAll = 0x3FFF ///< All of the above stats.
};
/// Specifies properties for @ref IQueryPool creation. Input structure to IDevice::CreateQueryPool().
struct QueryPoolCreateInfo
{
QueryPoolType queryPoolType; ///< Type of query pool to create (i.e., occlusion vs. pipeline stats).
uint32 numSlots; ///< Number of slots in the query pool.
uint32 enabledStats; ///< An ORed mask of stats flags specific to the query pool type.
/// @see QueryPipelineStatsFlags for PipelineStats query pools.
union
{
struct
{
/// If true, this query pool can have results retrieved using the CPU (using @ref IQueryPool::GetResults)
/// and can be reset using the CPU (using @ref IQueryPool::Reset). Otherwise, the client must use command
/// buffers to perform these operations (using @ref ICmdBuffer::CmdResetQueryPool and
/// @ref ICmdBuffer::CmdResolveQuery).
uint32 enableCpuAccess : 1;
uint32 reserved : 31; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed together as a uint32.
} flags; ///< Flags controlling QueryPool behavior.
};
/// Controls operations that compute query results.
enum QueryResultFlags : uint32
{
QueryResultDefault = 0x0, ///< Default to 32-bit results with no waiting.
QueryResult64Bit = 0x1, ///< Store all results as 64-bit values.
QueryResultWait = 0x2, ///< Wait for the queries to finish when computing the results.
QueryResultAvailability = 0x4, ///< If the results of a query are available at computation time a one will be
/// written as a separate value after the result value, if the results were not
/// available a zero will be written.
QueryResultPartial = 0x8, ///< If the final result of a query would be unavailable, then return a
/// result for that query between 0 and what the final result would be.
QueryResultAccumulate = 0x10, ///< Results are added to the values present in the destination, if availability
/// data is enabled it will be ANDed with the present availability data.
QueryResultPreferShaderPath = 0x20, ///< Prefer a shader resolve path over a command processor path.
QueryResultOnlyPrimNeeded = 0x40, ///< Select only primitives storage needed in Streamout query results
QueryResultAll = 0x7F ///< Clients should NOT use it, for internal static_assert purpose only.
};
/**
***********************************************************************************************************************
* @interface IQueryPool
* @brief Represents a set of queries that can be used to retrieve detailed info about the GPU's execution of a
* particular range of a command buffer.
*
* Currently, only occlusion queries and pipeline statistic queries are supported. All queries in a pool are the same
* type.
*
* @see IDevice::CreateQueryPool()
***********************************************************************************************************************
*/
class IQueryPool : public IGpuMemoryBindable
{
public:
/// Retrieves query results from a query pool.
///
/// Multiple consecutive query results can be retrieved with one call.
///
/// @param [in] flags Flags that control the result data layout and how the results are retrieved.
/// @param [in] queryType Specifies what data the query slots must produce.
/// @param [in] startQuery First query pool slot to retrieve data for.
/// @param [in] queryCount Number of query pool slots to retrieve data for.
/// @param [in] pMappedGpuAddr Specify the query buffer mapped address. If the parameter equals nullptr,
// this method will use Map\UnMap to access the data.
/// @param [in,out] pDataSize Input value specifies the available size in pData in bytes; output value reports the
/// number of bytes required to hold all result data.
/// @param [out] pData Location where the query results should be written. Can be null in order to query the
/// required size. The data returned depends on the query pool type and flags. All data
/// entries are either uint32 or uint64 integers. One or more type-specific entries will
/// be optionally followed by one entry for availability. The type-specific data is:<br>
/// + QueryOcclusion: One entry to store the zPass count.
/// + QueryPipelineStats: One entry per statistic enabled in the create info. The stats
/// will be written in the appropriate order for each PAL client.
/// @param [in] stride Stride in bytes between subsequent query result data or zero to request tightly
/// packed result data.
///
/// @returns Success if query results were successfully returned in pData, or NotReady if any of the requested query
/// slots does not yet have results available. Otherwise, one of the following error codes may be
/// returned:
/// + ErrorInvalidValue if the range defined by startQuery and queryCount is not valid for this query pool.
/// + ErrorGpuMemoryNotBound if the query pool requires GPU memory but none is bound.
/// + ErrorInvalidMemorySize if pData is non-null and the value stored in pDataSize is too small.
virtual Result GetResults(
QueryResultFlags flags,
QueryType queryType,
uint32 startQuery,
uint32 queryCount,
const void* pMappedGpuAddr,
size_t* pDataSize,
void* pData,
size_t stride) = 0;
/// Use CPU to reset the query pool slots.
///
/// Supported for occlusion and video decode statistics query pools.
///
/// @param [in] startQuery First query pool slot to reset.
/// @param [in] queryCount Number of query pool slots to reset.
/// @param [in] pMappedCpuAddr Specify the query buffer mapped address. If the parameter equals nullptr,
// this method will use Map/UnMap to access the data.
///
/// @returns Success if the reset was successfully performed.
virtual Result Reset(
uint32 startQuery,
uint32 queryCount,
void* pMappedCpuAddr) = 0;
/// Returns the distance, in bytes, between successive query slots in the bound GPU memory.
/// This method is only supported for @ref QueryPoolType::VideoDecodeStats
///
/// @returns the distance, in bytes, between successive query slots in the bound GPU memory.
virtual gpusize GetQuerySlotStride() const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IQueryPool() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IQueryPool() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palQueryPool.h
* @brief Defines the Platform Abstraction Library (PAL) IQueryPool interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palGpuMemoryBindable.h"
namespace Pal
{
/// Specifies a category of GPU query pool.
enum class QueryPoolType : uint32
{
Occlusion = 0x0, ///< Occlusion query pool. Supports queries based on the Z test.
PipelineStats = 0x1, ///< Pipeline stats query pool. Supports queries based on statistics from the GPU's execution
/// such as a count of prims generated, shader invocations, etc.
StreamoutStats = 0x2, ///< Streamout query pool. Supports queries based on statistics from the GPU's execution
/// such as number of primitives written to SO buffer and storage needed.
Count,
};
/// Specifies what data a query slot must produce. Some query pool types support multiple query types.
enum class QueryType : uint32
{
Occlusion = 0x0, ///< The total passes recorded by the Z test.
BinaryOcclusion = 0x1, ///< One if there were one or more Z test passes, zero otherwise.
PipelineStats = 0x2, ///< The total statistics selected by the given pipeline stats query pool.
StreamoutStats = 0x3, ///< SO statistics tracked by CP/VGT including primitives written and storage needed.
StreamoutStats1 = 0x4, ///< SO1 statistics tracked by CP/VGT including primitives written and storage needed.
StreamoutStats2 = 0x5, ///< SO2 statistics tracked by CP/VGT including primitives written and storage needed.
StreamoutStats3 = 0x6, ///< SO3 statistics tracked by CP/VGT including primitives written and storage needed.
Count,
};
/// Specifies which pipeline stats should be tracked by a pipeline stats query pool.
enum QueryPipelineStatsFlags : uint32
{
QueryPipelineStatsIaVertices = 0x1, ///< Input vertices.
QueryPipelineStatsIaPrimitives = 0x2, ///< Input primitives.
QueryPipelineStatsVsInvocations = 0x4, ///< Vertex shader invocations.
QueryPipelineStatsGsInvocations = 0x8, ///< Geometry shader invocations.
QueryPipelineStatsGsPrimitives = 0x10, ///< Geometry shader primitives.
QueryPipelineStatsCInvocations = 0x20, ///< Clipper invocations.
QueryPipelineStatsCPrimitives = 0x40, ///< Clipper primitives.
QueryPipelineStatsPsInvocations = 0x80, ///< Pixel shader invocations.
QueryPipelineStatsHsInvocations = 0x100, ///< Hull shader invocations.
QueryPipelineStatsDsInvocations = 0x200, ///< Domain shader invocations.
QueryPipelineStatsCsInvocations = 0x400, ///< Compute shader invocations.
QueryPipelineStatsTsInvocations = 0x800, ///< Task shader invocations.
QueryPipelineStatsMsInvocations = 0x1000, ///< Mesh shader invocations.
QueryPipelineStatsMsPrimitives = 0x2000, ///< Mesh shader primitives.
QueryPipelineStatsAll = 0x3FFF ///< All of the above stats.
};
/// Specifies properties for @ref IQueryPool creation. Input structure to IDevice::CreateQueryPool().
struct QueryPoolCreateInfo
{
QueryPoolType queryPoolType; ///< Type of query pool to create (i.e., occlusion vs. pipeline stats).
uint32 numSlots; ///< Number of slots in the query pool.
uint32 enabledStats; ///< An ORed mask of stats flags specific to the query pool type.
/// @see QueryPipelineStatsFlags for PipelineStats query pools.
union
{
struct
{
/// If true, this query pool can have results retrieved using the CPU (using @ref IQueryPool::GetResults)
/// and can be reset using the CPU (using @ref IQueryPool::Reset). Otherwise, the client must use command
/// buffers to perform these operations (using @ref ICmdBuffer::CmdResetQueryPool and
/// @ref ICmdBuffer::CmdResolveQuery).
uint32 enableCpuAccess : 1;
uint32 reserved : 31; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed together as a uint32.
} flags; ///< Flags controlling QueryPool behavior.
};
/// Controls operations that compute query results.
enum QueryResultFlags : uint32
{
QueryResultDefault = 0x0, ///< Default to 32-bit results with no waiting.
QueryResult64Bit = 0x1, ///< Store all results as 64-bit values.
QueryResultWait = 0x2, ///< Wait for the queries to finish when computing the results.
QueryResultAvailability = 0x4, ///< If the results of a query are available at computation time a one will be
/// written as a separate value after the result value, if the results were not
/// available a zero will be written.
QueryResultPartial = 0x8, ///< If the final result of a query would be unavailable, then return a
/// result for that query between 0 and what the final result would be.
QueryResultAccumulate = 0x10, ///< Results are added to the values present in the destination, if availability
/// data is enabled it will be ANDed with the present availability data.
QueryResultPreferShaderPath = 0x20, ///< Prefer a shader resolve path over a command processor path.
QueryResultOnlyPrimNeeded = 0x40, ///< Select only primitives storage needed in Streamout query results
QueryResultAll = 0x7F ///< Clients should NOT use it, for internal static_assert purpose only.
};
/**
***********************************************************************************************************************
* @interface IQueryPool
* @brief Represents a set of queries that can be used to retrieve detailed info about the GPU's execution of a
* particular range of a command buffer.
*
* Currently, only occlusion queries and pipeline statistic queries are supported. All queries in a pool are the same
* type.
*
* @see IDevice::CreateQueryPool()
***********************************************************************************************************************
*/
class IQueryPool : public IGpuMemoryBindable
{
public:
/// Retrieves query results from a query pool.
///
/// Multiple consecutive query results can be retrieved with one call.
///
/// @param [in] flags Flags that control the result data layout and how the results are retrieved.
/// @param [in] queryType Specifies what data the query slots must produce.
/// @param [in] startQuery First query pool slot to retrieve data for.
/// @param [in] queryCount Number of query pool slots to retrieve data for.
/// @param [in] pMappedGpuAddr Specify the query buffer mapped address. If the parameter equals nullptr,
// this method will use Map\UnMap to access the data.
/// @param [in,out] pDataSize Input value specifies the available size in pData in bytes; output value reports the
/// number of bytes required to hold all result data.
/// @param [out] pData Location where the query results should be written. Can be null in order to query the
/// required size. The data returned depends on the query pool type and flags. All data
/// entries are either uint32 or uint64 integers. One or more type-specific entries will
/// be optionally followed by one entry for availability. The type-specific data is:<br>
/// + QueryOcclusion: One entry to store the zPass count.
/// + QueryPipelineStats: One entry per statistic enabled in the create info. The stats
/// will be written in the appropriate order for each PAL client.
/// @param [in] stride Stride in bytes between subsequent query result data or zero to request tightly
/// packed result data.
///
/// @returns Success if query results were successfully returned in pData, or NotReady if any of the requested query
/// slots does not yet have results available. Otherwise, one of the following error codes may be
/// returned:
/// + ErrorInvalidValue if the range defined by startQuery and queryCount is not valid for this query pool.
/// + ErrorGpuMemoryNotBound if the query pool requires GPU memory but none is bound.
/// + ErrorInvalidMemorySize if pData is non-null and the value stored in pDataSize is too small.
virtual Result GetResults(
QueryResultFlags flags,
QueryType queryType,
uint32 startQuery,
uint32 queryCount,
const void* pMappedGpuAddr,
size_t* pDataSize,
void* pData,
size_t stride) = 0;
/// Use CPU to reset the query pool slots.
///
/// Supported for occlusion and video decode statistics query pools.
///
/// @param [in] startQuery First query pool slot to reset.
/// @param [in] queryCount Number of query pool slots to reset.
/// @param [in] pMappedCpuAddr Specify the query buffer mapped address. If the parameter equals nullptr,
// this method will use Map/UnMap to access the data.
///
/// @returns Success if the reset was successfully performed.
virtual Result Reset(
uint32 startQuery,
uint32 queryCount,
void* pMappedCpuAddr) = 0;
/// Returns the distance, in bytes, between successive query slots in the bound GPU memory.
/// This method is only supported for @ref QueryPoolType::VideoDecodeStats
///
/// @returns the distance, in bytes, between successive query slots in the bound GPU memory.
virtual gpusize GetQuerySlotStride() const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IQueryPool() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IQueryPool() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
File diff ditekan karena terlalu besar Load Diff
@@ -1,275 +1,275 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palQueueSemaphore.h
* @brief Defines the Platform Abstraction Library (PAL) IQueueSemaphore interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
#include <chrono>
#if defined(_WIN32)
struct _SECURITY_ATTRIBUTES;
#endif
namespace Pal
{
// Forward declarations.
class IQueueSemaphore;
/// Specifies properties for @ref IQueueSemaphore creation. Input structure to IDevice::CreateQueueSemaphore().
struct QueueSemaphoreCreateInfo
{
union
{
struct
{
/// This queue semaphore may be opened for use by a different device.
/// For DX12 native fence, the flag needs to be consistent with D3DDDI_SYNCHRONIZATIONOBJECT_FLAGS.Shared
/// given by DX runtime.
uint32 shareable : 1;
/// This queue semaphore can only be shared through Nt handle.
/// For DX12 native fence, the flag needs to be consistent with
/// D3DDDI_SYNCHRONIZATIONOBJECT_FLAGS.NtSecuritySharing given by DX runtime.
uint32 sharedViaNtHandle : 1;
uint32 externalOpened : 1; ///< Semaphore was created by other APIs
/// This queue semaphore is a timeline semaphore. Timeline semaphores have a 64-bit unsigned integer payload
/// which gets monotonically increased with each Signal operation. A wait on a timeline semaphore blocks the
/// waiter until the specified payload value has been signaled.
/// For DX12 native fence, runtime determines initialCount. Therefore, timeline flag has to be set.
uint32 timeline : 1;
/// Do not signal the queue semaphore to max if the device is lost.
/// For DX12 native fence, the flag needs to be consistent with
/// D3DDDI_SYNCHRONIZATIONOBJECT_FLAGS.NoSignalMaxValueOnTdr given by DX runtime.
uint32 noSignalOnDeviceLost : 1;
/// For native fence only. If it's 0x0, the native fence type is D3DDDI_NATIVEFENCE_TYPE_DEFAULT.
/// If it's 0x1, native fence type is D3DDDI_NATIVEFENCE_TYPE_INTRA_GPU.
/// For DX12, the value is determined by runtime. DXCP needs to set it by reading D3DDDI_NATIVEFENCEINFO.
uint32 gpuOnly : 1;
/// This queue semaphore will be a monitored fence if this flag set, even if OS supports native fence.
uint32 forceUseMonitoredFence : 1;
uint32 reserved : 25; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< Queue semaphore creation flags.
uint32 maxCount; ///< The maximum signal count; once reached, further signals are dropped. Must be
/// non-zero and no more than maxSemaphoreCount in @ref DeviceProperties. For
/// example, a value of one would request a binary semaphore.
/// NOTE: maxCount does not apply to timeline semaphores.
uint64 initialCount; ///< Initial value for timeline semaphores. (or)
/// Initial count value for counting semaphores.
/// Must not be larger than maxCount for counting semaphores.
/// For DX12 native fence, DXCP needs to pass InitialFenceValue from
/// D3DDDI_NATIVEFENCEINFO.
};
/// Specifies parameters for opening a queue semaphore for use on another device. Input structure to
/// IDevice::OpenSharedQueueSemaphore().
struct QueueSemaphoreOpenInfo
{
/// Shared queue semaphore object from another device to be opened.
IQueueSemaphore* pSharedQueueSemaphore;
};
/// Specifies parameters for opening a queue semaphore created by other APIs such as D3D.
struct ExternalQueueSemaphoreOpenInfo
{
union
{
struct
{
uint32 crossProcess : 1; ///< This semaphore is created in another process.
uint32 sharedViaNtHandle : 1; ///< The shared semaphore handle is NT handle.
uint32 isReference : 1; ///< If set, then the opened semaphore will reference the same sync
///< object in the kernel. Otherwise, the object is copied to the
///< new Semaphore.
/// This queue semaphore is a timeline semaphore. Timeline semaphores have a 64-bit unsigned integer payload
/// which gets monotonically increased with each Signal operation. A wait on a timeline semaphore blocks the
/// waiter until the specified payload value has been signaled.
uint32 timeline : 1;
uint32 reserved : 28; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< External queue semaphore open flags.
OsExternalHandle externalSemaphore; ///< External shared semaphore handle.
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 882
#if defined(__unix__) && PAL_KMT_BUILD
uint64 syncFdSignalValue; ///< Signal timeline value when importing the state of a sync file
#endif
#endif
};
/// Specifies parameters for exporting a queue semaphore. Input structure to IQueueSemaphore::ExportExternalHandle().
struct QueueSemaphoreExportInfo
{
union
{
struct
{
uint32 isReference : 1; ///< If set, then the semaphore exporting a handle that reference the
///< same sync object in the kernel. Otherwise, the object is copied
///< to the new Semaphore.
uint32 reserved : 31; ///< Resevered for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< External queue semaphore export flags.
#if PAL_KMT_BUILD
const _SECURITY_ATTRIBUTES* pSecurityAttributes; ///< It specifies the security descriptor and the inheritable
/// attribute.
const wchar_t* pNtObjectName; ///< A name to NT handle, if the object is exported as a NT
/// handle with a name, and then the handle can be acquired
/// via this name.
uint32 accessFlags; ///< Desried access rights of GPU memory.
#if defined(__unix__)
uint64 syncFdWaitValue; ///< Wait timeline value when exporting the state of a sync file
#endif
#endif
};
/**
***********************************************************************************************************************
* @interface IQueueSemaphore
* @brief Semaphore object used to synchronize GPU work performed by multiple, parallel queues.
*
* These semaphores are used by calling IQueue::SignalQueueSemaphore() and IQueue::WaitQueueSemaphore().
*
* @see IDevice::CreateQueueSemaphore()
* @see IDevice::OpenSharedQueueSemaphore()
***********************************************************************************************************************
*/
class IQueueSemaphore : public IDestroyable
{
public:
/// An IQueue::WaitQueueSemaphore operation may need to be sent down to the OS after the corresponding
/// IQueue::SignalQueueSemaphore operation due to GPU scheduler limitations. This method checks if any queues have
/// batched-up commands waiting for a SignalQueueSemaphore operation to appear.
///
/// @returns True if one or more queues have some number of commands batched-up waiting for other queues to signal
/// this semaphore. False otherwise.
virtual bool HasStalledQueues() = 0;
/// Query timeline Semaphore payload
///
/// @param [out] pValue returned payload from querying
///
/// @returns Success if the timeline semaphore is queried successful. Otherwise, one of the following errors may
/// be returned:
/// + ErrorInvalidValue if an unexpected conversion error occurs.
/// + ErrorInvalidObjectType if semaphore is non-timeline type.
virtual Result QuerySemaphoreValue(
uint64* pValue) = 0;
/// Wait on timeline Semaphore points, to be clarified, this is a CPU wait.
///
/// @param [in] value Indicate which point to be waited.
/// @param [in] timeout the max waiting time, timeout is the timeout period in units of nanoseconds.
///
/// @returns Success if the timeline semaphore point is waited successful. Otherwise, one of the following errors
/// may be returned:
/// + ErrorInvalidValue if an unexpected conversion error occurs.
/// + ErrorInvalidObjectType if semaphore is non-timeline type.
virtual Result WaitSemaphoreValue(
uint64 value,
std::chrono::nanoseconds timeout) = 0;
/// Signal on timeline Semaphore points, to be clarified, this is a CPU signal.
///
/// @param [in] value Indicate which point to be signaled.
///
/// @returns Success if the timeline semaphore point is signaled successful. Otherwise, one of the following errors
/// may be returned:
/// + ErrorInvalidValue if an unexpected conversion error occurs.
/// + ErrorInvalidObjectType if semaphore is non-timeline type.
virtual Result SignalSemaphoreValue(
uint64 value) = 0;
#if PAL_KMT_BUILD || PAL_AMDGPU_BUILD
/// Returns an OS-specific handle which can be used to refer to this semaphore object across processes. This will
/// return a null or invalid handle if the object was not created with the external create flag set.
///
/// @param [in] exportInfo Information describing how the Semamphore handle should be exported.
/// @note This function is only available for Linux builds.
///
/// @returns An OS-specific handle which can be used to access the semaphore object across processes.
virtual OsExternalHandle ExportExternalHandle(
const QueueSemaphoreExportInfo& exportInfo) const = 0;
#endif
#if defined(_WIN32)
/// Returns an OS-specific handle which can be used by another device to access the semaphore object.
///
/// @returns An OS-specific handle which can be used by another device to access the semaphore object.
virtual OsExternalHandle ExportKmtHandle() const = 0;
#endif
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IQueueSemaphore() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IQueueSemaphore() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palQueueSemaphore.h
* @brief Defines the Platform Abstraction Library (PAL) IQueueSemaphore interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
#include <chrono>
#if defined(_WIN32)
struct _SECURITY_ATTRIBUTES;
#endif
namespace Pal
{
// Forward declarations.
class IQueueSemaphore;
/// Specifies properties for @ref IQueueSemaphore creation. Input structure to IDevice::CreateQueueSemaphore().
struct QueueSemaphoreCreateInfo
{
union
{
struct
{
/// This queue semaphore may be opened for use by a different device.
/// For DX12 native fence, the flag needs to be consistent with D3DDDI_SYNCHRONIZATIONOBJECT_FLAGS.Shared
/// given by DX runtime.
uint32 shareable : 1;
/// This queue semaphore can only be shared through Nt handle.
/// For DX12 native fence, the flag needs to be consistent with
/// D3DDDI_SYNCHRONIZATIONOBJECT_FLAGS.NtSecuritySharing given by DX runtime.
uint32 sharedViaNtHandle : 1;
uint32 externalOpened : 1; ///< Semaphore was created by other APIs
/// This queue semaphore is a timeline semaphore. Timeline semaphores have a 64-bit unsigned integer payload
/// which gets monotonically increased with each Signal operation. A wait on a timeline semaphore blocks the
/// waiter until the specified payload value has been signaled.
/// For DX12 native fence, runtime determines initialCount. Therefore, timeline flag has to be set.
uint32 timeline : 1;
/// Do not signal the queue semaphore to max if the device is lost.
/// For DX12 native fence, the flag needs to be consistent with
/// D3DDDI_SYNCHRONIZATIONOBJECT_FLAGS.NoSignalMaxValueOnTdr given by DX runtime.
uint32 noSignalOnDeviceLost : 1;
/// For native fence only. If it's 0x0, the native fence type is D3DDDI_NATIVEFENCE_TYPE_DEFAULT.
/// If it's 0x1, native fence type is D3DDDI_NATIVEFENCE_TYPE_INTRA_GPU.
/// For DX12, the value is determined by runtime. DXCP needs to set it by reading D3DDDI_NATIVEFENCEINFO.
uint32 gpuOnly : 1;
/// This queue semaphore will be a monitored fence if this flag set, even if OS supports native fence.
uint32 forceUseMonitoredFence : 1;
uint32 reserved : 25; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< Queue semaphore creation flags.
uint32 maxCount; ///< The maximum signal count; once reached, further signals are dropped. Must be
/// non-zero and no more than maxSemaphoreCount in @ref DeviceProperties. For
/// example, a value of one would request a binary semaphore.
/// NOTE: maxCount does not apply to timeline semaphores.
uint64 initialCount; ///< Initial value for timeline semaphores. (or)
/// Initial count value for counting semaphores.
/// Must not be larger than maxCount for counting semaphores.
/// For DX12 native fence, DXCP needs to pass InitialFenceValue from
/// D3DDDI_NATIVEFENCEINFO.
};
/// Specifies parameters for opening a queue semaphore for use on another device. Input structure to
/// IDevice::OpenSharedQueueSemaphore().
struct QueueSemaphoreOpenInfo
{
/// Shared queue semaphore object from another device to be opened.
IQueueSemaphore* pSharedQueueSemaphore;
};
/// Specifies parameters for opening a queue semaphore created by other APIs such as D3D.
struct ExternalQueueSemaphoreOpenInfo
{
union
{
struct
{
uint32 crossProcess : 1; ///< This semaphore is created in another process.
uint32 sharedViaNtHandle : 1; ///< The shared semaphore handle is NT handle.
uint32 isReference : 1; ///< If set, then the opened semaphore will reference the same sync
///< object in the kernel. Otherwise, the object is copied to the
///< new Semaphore.
/// This queue semaphore is a timeline semaphore. Timeline semaphores have a 64-bit unsigned integer payload
/// which gets monotonically increased with each Signal operation. A wait on a timeline semaphore blocks the
/// waiter until the specified payload value has been signaled.
uint32 timeline : 1;
uint32 reserved : 28; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< External queue semaphore open flags.
OsExternalHandle externalSemaphore; ///< External shared semaphore handle.
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 882
#if defined(__unix__) && PAL_KMT_BUILD
uint64 syncFdSignalValue; ///< Signal timeline value when importing the state of a sync file
#endif
#endif
};
/// Specifies parameters for exporting a queue semaphore. Input structure to IQueueSemaphore::ExportExternalHandle().
struct QueueSemaphoreExportInfo
{
union
{
struct
{
uint32 isReference : 1; ///< If set, then the semaphore exporting a handle that reference the
///< same sync object in the kernel. Otherwise, the object is copied
///< to the new Semaphore.
uint32 reserved : 31; ///< Resevered for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
} flags; ///< External queue semaphore export flags.
#if PAL_KMT_BUILD
const _SECURITY_ATTRIBUTES* pSecurityAttributes; ///< It specifies the security descriptor and the inheritable
/// attribute.
const wchar_t* pNtObjectName; ///< A name to NT handle, if the object is exported as a NT
/// handle with a name, and then the handle can be acquired
/// via this name.
uint32 accessFlags; ///< Desried access rights of GPU memory.
#if defined(__unix__)
uint64 syncFdWaitValue; ///< Wait timeline value when exporting the state of a sync file
#endif
#endif
};
/**
***********************************************************************************************************************
* @interface IQueueSemaphore
* @brief Semaphore object used to synchronize GPU work performed by multiple, parallel queues.
*
* These semaphores are used by calling IQueue::SignalQueueSemaphore() and IQueue::WaitQueueSemaphore().
*
* @see IDevice::CreateQueueSemaphore()
* @see IDevice::OpenSharedQueueSemaphore()
***********************************************************************************************************************
*/
class IQueueSemaphore : public IDestroyable
{
public:
/// An IQueue::WaitQueueSemaphore operation may need to be sent down to the OS after the corresponding
/// IQueue::SignalQueueSemaphore operation due to GPU scheduler limitations. This method checks if any queues have
/// batched-up commands waiting for a SignalQueueSemaphore operation to appear.
///
/// @returns True if one or more queues have some number of commands batched-up waiting for other queues to signal
/// this semaphore. False otherwise.
virtual bool HasStalledQueues() = 0;
/// Query timeline Semaphore payload
///
/// @param [out] pValue returned payload from querying
///
/// @returns Success if the timeline semaphore is queried successful. Otherwise, one of the following errors may
/// be returned:
/// + ErrorInvalidValue if an unexpected conversion error occurs.
/// + ErrorInvalidObjectType if semaphore is non-timeline type.
virtual Result QuerySemaphoreValue(
uint64* pValue) = 0;
/// Wait on timeline Semaphore points, to be clarified, this is a CPU wait.
///
/// @param [in] value Indicate which point to be waited.
/// @param [in] timeout the max waiting time, timeout is the timeout period in units of nanoseconds.
///
/// @returns Success if the timeline semaphore point is waited successful. Otherwise, one of the following errors
/// may be returned:
/// + ErrorInvalidValue if an unexpected conversion error occurs.
/// + ErrorInvalidObjectType if semaphore is non-timeline type.
virtual Result WaitSemaphoreValue(
uint64 value,
std::chrono::nanoseconds timeout) = 0;
/// Signal on timeline Semaphore points, to be clarified, this is a CPU signal.
///
/// @param [in] value Indicate which point to be signaled.
///
/// @returns Success if the timeline semaphore point is signaled successful. Otherwise, one of the following errors
/// may be returned:
/// + ErrorInvalidValue if an unexpected conversion error occurs.
/// + ErrorInvalidObjectType if semaphore is non-timeline type.
virtual Result SignalSemaphoreValue(
uint64 value) = 0;
#if PAL_KMT_BUILD || PAL_AMDGPU_BUILD
/// Returns an OS-specific handle which can be used to refer to this semaphore object across processes. This will
/// return a null or invalid handle if the object was not created with the external create flag set.
///
/// @param [in] exportInfo Information describing how the Semamphore handle should be exported.
/// @note This function is only available for Linux builds.
///
/// @returns An OS-specific handle which can be used to access the semaphore object across processes.
virtual OsExternalHandle ExportExternalHandle(
const QueueSemaphoreExportInfo& exportInfo) const = 0;
#endif
#if defined(_WIN32)
/// Returns an OS-specific handle which can be used by another device to access the semaphore object.
///
/// @returns An OS-specific handle which can be used by another device to access the semaphore object.
virtual OsExternalHandle ExportKmtHandle() const = 0;
#endif
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const
{
return m_pClientData;
}
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IQueueSemaphore() : m_pClientData(nullptr) {}
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IQueueSemaphore() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
};
} // Pal
@@ -1,251 +1,253 @@
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palShaderLibrary.h
* @brief Defines the Platform Abstraction Library (PAL) IShaderLibrary interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
#include "palStringView.h"
#include "palSpan.h"
namespace Pal
{
struct GpuMemSubAllocInfo;
/// Common flags controlling creation of shader libraries.
union LibraryCreateFlags
{
struct
{
uint32 clientInternal : 1; ///< Internal library not created by the application.
uint32 isGraphics : 1; ///< Whether it is a graphics library
uint32 reserved : 30; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
};
/// Specifies properties about an indirect function belonging to a @ref IShaderLibrary object. Part of the input
/// structure to IDevice::CreateShaderLibrary().
struct ShaderLibraryFunctionInfo
{
Util::StringView<char> symbolName; ///< ELF Symbol name for the associated function.
gpusize gpuVirtAddr; ///< [out] GPU virtual address of the function. This is computed by PAL during
/// library creation.
};
/// Specifies a shader sub type / ShaderKind.
enum class ShaderSubType : uint32
{
Unknown = 0,
Traversal,
RayGeneration,
Intersection,
AnyHit,
ClosestHit,
Miss,
Callable,
LaunchKernel, ///< Raytracing launch kernel
Count
};
/// Specifies properties for creation of a compute @ref IShaderLibrary object. Input structure to
/// IDevice::CreateShaderLibrary().
struct ShaderLibraryCreateInfo
{
LibraryCreateFlags flags; ///< Library creation flags
const void* pCodeObject; ///< Pointer to code-object ELF binary implementing the Pipeline ABI interface.
/// The code-object ELF contains pre-compiled shaders, register values, and
/// additional metadata.
size_t codeObjectSize; ///< Size of code object in bytes.
};
/// Reports properties of a compiled library.
struct LibraryInfo
{
PipelineHash internalLibraryHash; ///< 128-bit identifier extracted from this library's ELF binary, composed of
/// the state the compiler decided was appropriate to identify the compiled
/// library. The lower 64 bits are "stable"; the upper 64 bits are "unique".
};
/// Reports shader stats. Multiple bits set in the shader stage mask indicates that multiple shaders have been combined
/// due to HW support. The same information will be repeated for both the constituent shaders in this case.
struct ShaderLibStats
{
ShaderHash shaderHash; ///< Shader hash.
CommonShaderStats common; ///< The shader compilation parameters for this shader.
/// Maximum number of VGPRs the compiler was allowed to use for this shader. This limit will be the minimum
/// of any architectural restriction and any client-requested limit intended to increase the number of waves in
/// flight.
uint32 numAvailableVgprs;
/// Maximum number of SGPRs the compiler was allowed to use for this shader. This limit will be the minimum
/// of any architectural restriction and any client-requested limit intended to increase the number of waves in
/// flight.
uint32 numAvailableSgprs;
size_t isaSizeInBytes; ///< Size of the shader ISA disassembly for this shader.
PipelineHash palInternalLibraryHash; ///< Internal hash of the shader compilation data used by PAL.
uint32 stackFrameSizeInBytes; ///< Shader function stack frame size
ShaderSubType shaderSubType; ///< ShaderSubType / Shader Kind
CompilerStackSizes cpsStackSizes; ///< Stack used in Continuation
};
/**
***********************************************************************************************************************
* @interface IShaderLibrary
* @brief Object containing one or more shader functions stored in GPU memory. These shader functions are callable
* from the shaders contained within IPipeline objects.
*
* Before a pipeline which calls into this library is bound to a command buffer (using @ref ICmdBuffer::BindPipeline),
* the client must call @ref IPipeline::LinkWithLibraries() and specify this library in the list of linked libraries.
* Failure to comply with this requirement is an error and will result in undefined behavior.
*
* @see IDevice::CreateShaderLibrary()
* @see IPipeline::LinkWithLibraries()
***********************************************************************************************************************
*/
class IShaderLibrary : public IDestroyable
{
public:
/// Returns properties of this library and its corresponding shader functions.
///
/// @returns Property structure describing this library.
virtual const LibraryInfo& GetInfo() const = 0;
/// Returns a list of GPU memory allocations used by this library.
///
/// @param [in,out] pNumEntries Input value specifies the available size in pAllocInfoList; output value
/// reports the number of GPU memory allocations.
/// @param [out] pAllocInfoList If pAllocInfoList=nullptr, then pNumEntries is ignored on input. On output it
/// will reflect the number of allocations that make up this pipeline. If
/// pAllocInfoList!=nullptr, then on input pNumEntries is assumed to be the number
/// of entries in the pAllocInfoList array. On output, pNumEntries reflects the
/// number of entries in pAllocInfoList that are valid.
/// @returns Success if the allocation info was successfully written to the buffer.
/// + ErrorInvalidValue if the caller provides a buffer size that is different from the size needed.
/// + ErrorInvalidPointer if pNumEntries is nullptr.
virtual Result QueryAllocationInfo(
size_t* pNumEntries,
GpuMemSubAllocInfo* const pAllocInfoList) const = 0;
/// Gives the client access to the resource ID used for internal Pal events.
/// EX: Resource Create, Resource Bind, Resource Destroy.
///
/// @returns The Resource ID.
virtual const void* GetResourceId() const = 0;
/// Obtains the binary code object for this library.
///
/// @param [in, out] pSize Represents the size of the shader ISA code.
///
/// @param [out] pBuffer If non-null, the library ELF is written in the buffer. If null, the size required
/// for the library ELF is given out in the location pSize.
///
/// @returns Success if the library binary was fetched successfully.
/// +ErrorUnavailable if the library binary was not fetched successfully.
virtual Result GetCodeObject(
uint32* pSize,
void* pBuffer) const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const { return m_pClientData; }
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
/// Obtains the compiled shader ISA code for the shader function specified.
///
/// @param [in] pShaderExportName The shader exported name
///
/// @param [in, out] pSize Represents the size of the shader ISA code.
///
/// @param [out] pBuffer If non-null, the shader ISA code is written in the buffer. If null, the size required
/// for the shader ISA is given out in the location pSize.
///
/// @returns Success if the shader ISA code was fetched successfully.
/// +ErrorUnavailable if the shader ISA code was not fetched successfully.
virtual Result GetShaderFunctionCode(
Util::StringView<char> shaderExportName,
size_t* pSize,
void* pBuffer) const = 0;
/// Obtains the shader pre and post compilation stats/params for the specified shader.
///
/// @param [in] pShaderExportName The shader exported name
///
/// @param [out] pShaderStats Pointer to the ShaderStats structure which will be filled with the shader stats for
/// the shader stage mentioned in shaderType. This cannot be nullptr.
/// @param [in] getDisassemblySize If set to true performs disassembly on the shader binary code and reports the
/// size of the disassembly string in ShaderStats::isaSizeInBytes. Else reports 0.
/// @returns Success if the stats were successfully obtained for this shader, including the shader disassembly size.
/// +ErrorUnavailable if a wrong shader stage for this pipeline was specified, or if some internal error
/// occured.
virtual Result GetShaderFunctionStats(
Util::StringView<char> shaderExportName,
ShaderLibStats* pShaderStats) const = 0;
/// Returns the function list owned by this shader library
///
/// @returns A list of ShaderLibraryFunctionInfo.
virtual const Util::Span<const ShaderLibraryFunctionInfo> GetShaderLibFunctionInfos() const = 0;
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IShaderLibrary() : m_pClientData(nullptr) { }
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IShaderLibrary() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
IShaderLibrary(const IShaderLibrary&) = delete;
IShaderLibrary& operator=(const IShaderLibrary&) = delete;
};
} // Pal
/*
***********************************************************************************************************************
*
* Copyright (c) 2014-2025 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
**********************************************************************************************************************/
/**
***********************************************************************************************************************
* @file palShaderLibrary.h
* @brief Defines the Platform Abstraction Library (PAL) IShaderLibrary interface and related types.
***********************************************************************************************************************
*/
#pragma once
#include "pal.h"
#include "palDestroyable.h"
#include "palStringView.h"
#include "palSpan.h"
namespace Pal
{
struct GpuMemSubAllocInfo;
/// Common flags controlling creation of shader libraries.
union LibraryCreateFlags
{
struct
{
uint32 clientInternal : 1; ///< Internal library not created by the application.
uint32 isGraphics : 1; ///< Whether it is a graphics library
uint32 reserved : 30; ///< Reserved for future use.
};
uint32 u32All; ///< Flags packed as 32-bit uint.
};
/// Specifies properties about an indirect function belonging to a @ref IShaderLibrary object. Part of the input
/// structure to IDevice::CreateShaderLibrary().
struct ShaderLibraryFunctionInfo
{
Util::StringView<char> symbolName; ///< ELF Symbol name for the associated function.
gpusize gpuVirtAddr; ///< [out] GPU virtual address of the function. This is computed by PAL during
/// library creation.
};
/// Specifies a shader sub type / ShaderKind.
enum class ShaderSubType : uint32
{
Unknown = 0,
Traversal,
RayGeneration,
Intersection,
AnyHit,
ClosestHit,
Miss,
Callable,
LaunchKernel, ///< Raytracing launch kernel
Count
};
/// Specifies properties for creation of a compute @ref IShaderLibrary object. Input structure to
/// IDevice::CreateShaderLibrary().
struct ShaderLibraryCreateInfo
{
LibraryCreateFlags flags; ///< Library creation flags
const void* pCodeObject; ///< Pointer to code-object ELF binary implementing the Pipeline ABI interface.
/// The code-object ELF contains pre-compiled shaders, register values, and
/// additional metadata.
size_t codeObjectSize; ///< Size of code object in bytes.
};
/// Reports properties of a compiled library.
struct LibraryInfo
{
PipelineHash internalLibraryHash; ///< 128-bit identifier extracted from this library's ELF binary, composed of
/// the state the compiler decided was appropriate to identify the compiled
/// library. The lower 64 bits are "stable"; the upper 64 bits are "unique".
Util::StringView<char> colorExports; ///< For a Graphics Partial Pipeline pixel shader, an opaque
/// string to pass to the compiler to build the color export shader.
};
/// Reports shader stats. Multiple bits set in the shader stage mask indicates that multiple shaders have been combined
/// due to HW support. The same information will be repeated for both the constituent shaders in this case.
struct ShaderLibStats
{
ShaderHash shaderHash; ///< Shader hash.
CommonShaderStats common; ///< The shader compilation parameters for this shader.
/// Maximum number of VGPRs the compiler was allowed to use for this shader. This limit will be the minimum
/// of any architectural restriction and any client-requested limit intended to increase the number of waves in
/// flight.
uint32 numAvailableVgprs;
/// Maximum number of SGPRs the compiler was allowed to use for this shader. This limit will be the minimum
/// of any architectural restriction and any client-requested limit intended to increase the number of waves in
/// flight.
uint32 numAvailableSgprs;
size_t isaSizeInBytes; ///< Size of the shader ISA disassembly for this shader.
PipelineHash palInternalLibraryHash; ///< Internal hash of the shader compilation data used by PAL.
uint32 stackFrameSizeInBytes; ///< Shader function stack frame size
ShaderSubType shaderSubType; ///< ShaderSubType / Shader Kind
CompilerStackSizes cpsStackSizes; ///< Stack used in Continuation
};
/**
***********************************************************************************************************************
* @interface IShaderLibrary
* @brief Object containing one or more shader functions stored in GPU memory. These shader functions are callable
* from the shaders contained within IPipeline objects.
*
* Before a pipeline which calls into this library is bound to a command buffer (using @ref ICmdBuffer::BindPipeline),
* the client must call @ref IPipeline::LinkWithLibraries() and specify this library in the list of linked libraries.
* Failure to comply with this requirement is an error and will result in undefined behavior.
*
* @see IDevice::CreateShaderLibrary()
* @see IPipeline::LinkWithLibraries()
***********************************************************************************************************************
*/
class IShaderLibrary : public IDestroyable
{
public:
/// Returns properties of this library and its corresponding shader functions.
///
/// @returns Property structure describing this library.
virtual const LibraryInfo& GetInfo() const = 0;
/// Returns a list of GPU memory allocations used by this library.
///
/// @param [in,out] pNumEntries Input value specifies the available size in pAllocInfoList; output value
/// reports the number of GPU memory allocations.
/// @param [out] pAllocInfoList If pAllocInfoList=nullptr, then pNumEntries is ignored on input. On output it
/// will reflect the number of allocations that make up this pipeline. If
/// pAllocInfoList!=nullptr, then on input pNumEntries is assumed to be the number
/// of entries in the pAllocInfoList array. On output, pNumEntries reflects the
/// number of entries in pAllocInfoList that are valid.
/// @returns Success if the allocation info was successfully written to the buffer.
/// + ErrorInvalidValue if the caller provides a buffer size that is different from the size needed.
/// + ErrorInvalidPointer if pNumEntries is nullptr.
virtual Result QueryAllocationInfo(
size_t* pNumEntries,
GpuMemSubAllocInfo* const pAllocInfoList) const = 0;
/// Gives the client access to the resource ID used for internal Pal events.
/// EX: Resource Create, Resource Bind, Resource Destroy.
///
/// @returns The Resource ID.
virtual const void* GetResourceId() const = 0;
/// Obtains the binary code object for this library.
///
/// @param [in, out] pSize Represents the size of the shader ISA code.
///
/// @param [out] pBuffer If non-null, the library ELF is written in the buffer. If null, the size required
/// for the library ELF is given out in the location pSize.
///
/// @returns Success if the library binary was fetched successfully.
/// +ErrorUnavailable if the library binary was not fetched successfully.
virtual Result GetCodeObject(
uint32* pSize,
void* pBuffer) const = 0;
/// Returns the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @returns Pointer to client data.
void* GetClientData() const { return m_pClientData; }
/// Sets the value of the associated arbitrary client data pointer.
/// Can be used to associate arbitrary data with a particular PAL object.
///
/// @param [in] pClientData A pointer to arbitrary client data.
void SetClientData(
void* pClientData)
{
m_pClientData = pClientData;
}
/// Obtains the compiled shader ISA code for the shader function specified.
///
/// @param [in] pShaderExportName The shader exported name
///
/// @param [in, out] pSize Represents the size of the shader ISA code.
///
/// @param [out] pBuffer If non-null, the shader ISA code is written in the buffer. If null, the size required
/// for the shader ISA is given out in the location pSize.
///
/// @returns Success if the shader ISA code was fetched successfully.
/// +ErrorUnavailable if the shader ISA code was not fetched successfully.
virtual Result GetShaderFunctionCode(
Util::StringView<char> shaderExportName,
size_t* pSize,
void* pBuffer) const = 0;
/// Obtains the shader pre and post compilation stats/params for the specified shader.
///
/// @param [in] pShaderExportName The shader exported name
///
/// @param [out] pShaderStats Pointer to the ShaderStats structure which will be filled with the shader stats for
/// the shader stage mentioned in shaderType. This cannot be nullptr.
/// @param [in] getDisassemblySize If set to true performs disassembly on the shader binary code and reports the
/// size of the disassembly string in ShaderStats::isaSizeInBytes. Else reports 0.
/// @returns Success if the stats were successfully obtained for this shader, including the shader disassembly size.
/// +ErrorUnavailable if a wrong shader stage for this pipeline was specified, or if some internal error
/// occured.
virtual Result GetShaderFunctionStats(
Util::StringView<char> shaderExportName,
ShaderLibStats* pShaderStats) const = 0;
/// Returns the function list owned by this shader library
///
/// @returns A list of ShaderLibraryFunctionInfo.
virtual const Util::Span<const ShaderLibraryFunctionInfo> GetShaderLibFunctionInfos() const = 0;
protected:
/// @internal Constructor. Prevent use of new operator on this interface. Client must create objects by explicitly
/// called the proper create method.
IShaderLibrary() : m_pClientData(nullptr) { }
/// @internal Destructor. Prevent use of delete operator on this interface. Client must destroy objects by
/// explicitly calling IDestroyable::Destroy() and is responsible for freeing the system memory allocated for the
/// object on their own.
virtual ~IShaderLibrary() { }
private:
/// @internal Client data pointer. This can have an arbitrary value and can be returned by calling GetClientData()
/// and set via SetClientData().
/// For non-top-layer objects, this will point to the layer above the current object.
void* m_pClientData;
IShaderLibrary(const IShaderLibrary&) = delete;
IShaderLibrary& operator=(const IShaderLibrary&) = delete;
};
} // Pal