ファイル
rocm-systems/projects/clr/rocclr/device/pal/paldevice.hpp
T
2025-10-23 11:21:49 -07:00

788 行
30 KiB
C++

/* Copyright (c) 2015 - 2023 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#include "top.hpp"
#include "device/device.hpp"
#include "platform/command.hpp"
#include "platform/program.hpp"
#include "platform/perfctr.hpp"
#include "platform/threadtrace.hpp"
#include "platform/memory.hpp"
#include "platform/runtime.hpp"
#include "utils/concurrent.hpp"
#include "thread/thread.hpp"
#include "thread/monitor.hpp"
#include "device/pal/palvirtual.hpp"
#include "device/pal/palmemory.hpp"
#include "device/pal/paldefs.hpp"
#include "device/pal/palsettings.hpp"
#include "device/pal/palappprofile.hpp"
#include "device/pal/palcapturemgr.hpp"
#include "device/pal/palsignal.hpp"
#include "memory"
#include <atomic>
#include <unordered_set>
#if defined(__clang__)
#if __has_feature(address_sanitizer)
#include "device/devurilocator.hpp"
#endif
#endif
/*! \addtogroup PAL
* @{
*/
//! PAL Device Implementation
namespace amd::pal {
//! A nil device object
class NullDevice : public amd::Device {
public:
static bool init(void);
//! Construct a new identifier
NullDevice();
//! Creates an offline device with the specified target
bool create(const char* palName, //!< Device name
const amd::Isa& isa, //!< Device ISA
Pal::GfxIpLevel ipLevel, //!< GPU ip level
Pal::AsicRevision asicRevision //!< PAL ASIC revision
);
//! Instantiate a new virtual device
virtual device::VirtualDevice* createVirtualDevice(amd::CommandQueue* queue = nullptr) {
return nullptr;
}
//! Compile the given source code.
virtual device::Program* createProgram(amd::Program& owner,
amd::option::Options* options = nullptr);
//! Just returns NULL for the dummy device
virtual device::Memory* createMemory(amd::Memory& owner) const { return nullptr; }
//! Just returns NULL for the dummy device
virtual device::Memory* createMemory(size_t size, size_t alignment = 0) const { return nullptr; }
//! Sampler object allocation
virtual bool createSampler(const amd::Sampler& owner, //!< abstraction layer sampler object
device::Sampler** sampler //!< device sampler object
) const {
ShouldNotReachHere();
return true;
}
//! Just returns NULL for the dummy device
virtual device::Memory* createView(
amd::Memory& owner, //!< Owner memory object
const device::Memory& parent //!< Parent device memory object for the view
) const {
return nullptr;
}
//! Signal object allocation
virtual device::Signal* createSignal() const { return nullptr; }
//! Acquire external graphics API object in the host thread
//! Needed for OpenGL objects on CPU device
virtual bool bindExternalDevice(uint flags, void* const pDevice[], void* pContext,
bool validateOnly) {
return true;
}
virtual bool unbindExternalDevice(uint flags, void* const pDevice[], void* pContext,
bool validateOnly) {
return true;
}
//! Releases non-blocking map target memory
virtual void freeMapTarget(amd::Memory& mem, void* target) {}
Pal::GfxIpLevel ipLevel() const { return ipLevel_; }
Pal::AsicRevision asicRevision() const { return asicRevision_; }
//! Empty implementation on Null device
virtual bool globalFreeMemory(size_t* freeMemory) const { return false; }
//! Empty implementation on Null device
virtual bool amdFileRead(amd::Os::FileDesc handle, void* devicePtr, uint64_t size, int64_t file_offset,
uint64_t* size_copied, int32_t* status) {
ShouldNotReachHere();
return false;
}
//! Empty implementation on Null device
virtual bool amdFileWrite(amd::Os::FileDesc handle, void* devicePtr, uint64_t size, int64_t file_offset,
uint64_t* size_copied, int32_t* status) {
ShouldNotReachHere();
return false;
}
//! Get GPU device settings
const pal::Settings& settings() const { return reinterpret_cast<pal::Settings&>(*settings_); }
virtual void* svmAlloc(amd::Context& context, size_t size, size_t alignment,
cl_svm_mem_flags flags, void* svmPtr) const {
return NULL;
}
virtual void svmFree(void* ptr) const { return; }
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment) { return nullptr; };
virtual bool virtualFree(void* addr) { return true; }
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags,
VmmLocationType = VmmLocationType::kDevice) {
return true;
}
virtual bool GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr) const { return true; }
virtual bool ValidateMemAccess(amd::Memory& mem, bool read_write) const { return true; }
virtual bool ExportShareableVMMHandle(amd::Memory& amd_mem_obj, int flags,
void* shareableHandle) {
return false;
}
virtual amd::Memory* ImportShareableVMMHandle(void* osHandle) { return nullptr; }
virtual bool importExtSemaphore(void** extSemaphore, const amd::Os::FileDesc& handle,
amd::ExternalSemaphoreHandleType sem_handle_type) override {
return false;
}
virtual void DestroyExtSemaphore(void* extSemaphore) {}
void* Alloc(const Util::AllocInfo& allocInfo) { return allocator_.Alloc(allocInfo); }
void Free(const Util::FreeInfo& freeInfo) { allocator_.Free(freeInfo); }
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput,
cl_set_device_clock_mode_output_amd* pSetClockModeOutput) {
return true;
}
#if defined(__clang__)
#if __has_feature(address_sanitizer)
virtual device::UriLocator* createUriLocator() const { return nullptr; }
#endif
#endif
protected:
static Util::GenericAllocator allocator_; //!< Generic memory allocator in PAL
Pal::AsicRevision asicRevision_; //!< ASIC revision
Pal::GfxIpLevel ipLevel_; //!< Device IP level
const char* palName_; //!< Device name
//! Fills OpenCL device info structure
void fillDeviceInfo(const Pal::DeviceProperties& palProp, //!< PAL device properties
const Pal::GpuMemoryHeapProperties heaps[Pal::GpuHeapCount],
size_t maxTextureSize, //!< Maximum texture size supported in HW
uint numComputeRings, //!< Number of compute rings
uint numExclusiveComputeRings, //!< Number of exclusive compute rings
Pal::IDevice* pal_device //!< PAL device for which info is filled
);
};
//! Forward declarations
class Command;
class Device;
class GpuCommand;
class Heap;
class HeapBlock;
class Program;
class Kernel;
class Memory;
class Resource;
class GpuMemoryReference;
class VirtualDevice;
class PrintfDbg;
class ThreadTrace;
#ifndef CL_FILTER_NONE
#define CL_FILTER_NONE 0x1142
#endif
enum class ExclusiveQueueType : uint32_t { RealTime0 = 0, RealTime1, Medium };
class Sampler : public device::Sampler {
public:
//! Constructor
Sampler(const Device& dev) : dev_(dev) {}
//! Default destructor for the device memory object
virtual ~Sampler();
//! Creates a device sampler from the OCL sampler state
bool create(uint32_t oclSamplerState, //!< OCL sampler state
const uint addressMode[3] //!< Address modes in X, Y and Z
);
//! Creates a device sampler from the OCL sampler state
bool create(const amd::Sampler& owner //!< AMD sampler object
);
private:
//! Disable default copy constructor
Sampler& operator=(const Sampler&);
//! Disable operator=
Sampler(const Sampler&);
const Device& dev_; //!< Device object associated with the sampler
};
//! A GPU device ordinal (physical GPU device)
class Device : public NullDevice {
public:
struct QueueRecycleInfo : public amd::HeapObject {
int counter_; //!< Lock usage counter
Pal::EngineType engineType_; //!< Engine type
uint32_t index_; //!< HW queue index for scratch buffer access
amd::Monitor queue_lock_; //!< Queue lock for access
AqlPacketMgmt aql_packet_mgmt_; //!< AQL packets management class for debugger support
QueueRecycleInfo()
: counter_(1),
engineType_(Pal::EngineTypeCompute),
index_(0),
queue_lock_(true) /* Queue lock for sharing */ {}
//! Returns the aql packet list
uintptr_t AqlPacketList() const {
return reinterpret_cast<uintptr_t>(&aql_packet_mgmt_.aql_packets_);
}
};
//! Locks any access to the virtual GPUs
class ScopedLockVgpus : public amd::StackObject {
public:
//! Default constructor
ScopedLockVgpus(const Device& dev);
//! Destructor
~ScopedLockVgpus();
private:
const Device& dev_; //! Device object
};
//! Transfer buffers
class XferBuffers : public amd::HeapObject {
public:
static constexpr size_t MaxXferBufListSize = 8;
//! Default constructor
XferBuffers(const Device& device, Resource::MemoryType type, size_t bufSize)
: type_(type), bufSize_(bufSize), acquiredCnt_(0), gpuDevice_(device) {}
//! Default destructor
~XferBuffers();
//! Creates the xfer buffers object
bool create();
//! Acquires an instance of the transfer buffers
Memory& acquire();
//! Releases transfer buffer
void release(VirtualGPU& gpu, //!< Virual GPU object used with the buffer
Memory& buffer //!< Transfer buffer for release
);
//! Returns the buffer's size for transfer
size_t bufSize() const { return bufSize_; }
private:
//! Disable copy constructor
XferBuffers(const XferBuffers&);
//! Disable assignment operator
XferBuffers& operator=(const XferBuffers&);
//! Get device object
const Device& dev() const { return gpuDevice_; }
Resource::MemoryType type_; //!< The buffer's type
size_t bufSize_; //!< Staged buffer size
std::list<Memory*> freeBuffers_; //!< The list of free buffers
std::atomic<uint> acquiredCnt_; //!< The total number of acquired buffers
amd::Monitor lock_; //!< Stgaed buffer acquire/release lock
const Device& gpuDevice_; //!< GPU device object
};
struct ScratchBuffer : public amd::HeapObject {
Memory* memObj_; //!< Memory objects for scratch buffers
uint64_t offset_; //!< Offset from the global scratch store
uint64_t size_; //!< Scratch buffer size on this queue
//! Default constructor
ScratchBuffer() : memObj_(nullptr), offset_(0), size_(0) {}
//! Default constructor
~ScratchBuffer();
//! Destroys memory objects
void destroyMemory();
};
class SrdManager : public amd::HeapObject {
public:
SrdManager(const Device& dev, uint srdSize, uint bufSize)
: dev_(dev),
numFlags_(bufSize / (srdSize * MaskBits)),
srdSize_(srdSize),
bufSize_(bufSize) {}
~SrdManager();
//! Allocates a new SRD slot for a resource
uint64_t allocSrdSlot(address* cpuAddr);
//! Frees a SRD slot
void freeSrdSlot(uint64_t addr);
// Fills the memory list for VidMM KMD
void fillResourceList(VirtualGPU& gpu);
private:
//! Disable copy constructor
SrdManager(const SrdManager&);
//! Disable assignment operator
SrdManager& operator=(const SrdManager&);
struct Chunk {
Memory* buf_;
uint* flags_;
Chunk() : buf_(NULL), flags_(NULL) {}
};
static constexpr uint MaskBits = 32;
const Device& dev_; //!< GPU device for the chunk manager
amd::Monitor ml_; //!< Global lock for the SRD manager
std::vector<Chunk> pool_; //!< Pool of SRD buffers
uint numFlags_; //!< Total number of flags in array
uint srdSize_; //!< SRD size
uint bufSize_; //!< Buffer size that holds SRDs
};
//! Initialise the whole GPU device subsystem
static bool init();
//! Shutdown the whole GPU device subsystem
static void tearDown();
//! Construct a new physical GPU device
Device();
//! Initialise a device (i.e. all parts of the constructor that could
//! potentially fail)
bool create(Pal::IDevice* device //!< PAL device interface object
);
//! Destructor for the physical GPU device
virtual ~Device();
//! Instantiate a new virtual device
device::VirtualDevice* createVirtualDevice(amd::CommandQueue* queue = NULL);
//! Memory allocation
virtual device::Memory* createMemory(amd::Memory& owner //!< abstraction layer memory object
) const;
virtual device::Memory* createMemory(size_t size, size_t alignment = 0) const;
//! Sampler object allocation
virtual bool createSampler(const amd::Sampler& owner, //!< abstraction layer sampler object
device::Sampler** sampler //!< device sampler object
) const;
//! Allocates a view object from the device memory
virtual device::Memory* createView(
amd::Memory& owner, //!< Owner memory object
const device::Memory& parent //!< Parent device memory object for the view
) const;
//! Signal object allocation
virtual device::Signal* createSignal() const { return new pal::Signal(); }
//! Create the device program.
virtual device::Program* createProgram(amd::Program& owner, amd::option::Options* options = NULL);
//! Attempt to bind with external graphics API's device/context
virtual bool bindExternalDevice(uint flags, void* const pDevice[], void* pContext,
bool validateOnly);
//! Attempt to unbind with external graphics API's device/context
virtual bool unbindExternalDevice(uint flags, void* const pDevice[], void* pContext,
bool validateOnly);
//! Free resource cache on device if OCL context was destroyed.
//! @note: Backend device doesn't track resources per context and releases all resources,
//! regardless the number of still active contexts
virtual void ContextDestroy() {
// The if condition is a best effort to avoid crash if the function is called after DLL detached
if (!amd::Runtime::isLibraryDetached()) {
resourceCache().free();
}
}
//! Validates kernel before execution
virtual bool validateKernel(const amd::Kernel& kernel, //!< AMD kernel object
const device::VirtualDevice* vdev, bool coop_group = false);
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput,
cl_set_device_clock_mode_output_amd* pSetClockModeOutput);
//! Retrieves information about free memory on a GPU device
virtual bool globalFreeMemory(size_t* freeMemory) const;
/**
* @brief Read data from a file to device memory.
* @param[IN] handle: file descriptor of the file to read.
* @param[IN] devicePtr: VRAM buffer pointer.
* @param[IN] size: size of read.
* @param[IN] file_offset: offset into fd where data has to be read.
* @param[IN/OUT] size_copied: actual size read.
* @param[IN/OUT] status: additional status.
*/
virtual bool amdFileRead(amd::Os::FileDesc handle, void* devicePtr, uint64_t size, int64_t file_offset,
uint64_t* size_copied, int32_t* status);
/**
* Write data from device memory to a file.
* @param[IN] handle: file descriptor of the file to write.
* @param[IN] devicePtr: VRAM buffer pointer.
* @param[IN] size: size of write.
* @param[IN] file_offset: offset into fd where data has to written.
* @param[IN/OUT] size_copied: actual size copied.
* @param[IN/OUT] status: additional status.
*/
virtual bool amdFileWrite(amd::Os::FileDesc handle, void* devicePtr, uint64_t size, int64_t file_offset,
uint64_t* size_copied, int32_t* status);
//! Returns a GPU memory object from AMD memory object
pal::Memory* getGpuMemory(amd::Memory* mem //!< Pointer to AMD memory object
) const;
amd::Monitor& lockAsyncOps() const { return lockAsyncOps_; }
//! Returns the lock object for the virtual gpus list
amd::Monitor& vgpusAccess() const { return vgpusAccess_; }
//! Returns the monitor object for PAL
amd::Monitor& lockPAL() const { return lockPAL_; }
//! Returns the monitor object for PAL
amd::Monitor& lockResources() const { return lockResourceOps_; }
//! Returns the number of virtual GPUs allocated on this device
uint numOfVgpus() const { return numOfVgpus_; }
uint numOfVgpus_; //!< The number of virtual GPUs (lock protected)
typedef std::vector<VirtualGPU*> VirtualGPUs;
//! Returns the list of all virtual GPUs running on this device
const VirtualGPUs& vgpus() const { return vgpus_; }
VirtualGPUs vgpus_; //!< The list of all running virtual gpus (lock protected)
//! Scratch buffer allocation
pal::Memory* createScratchBuffer(size_t size //!< Size of buffer
) const;
//! Returns transfer buffer object
XferBuffers& xferRead() const { return *xferRead_; }
//! Finds an appropriate map target
amd::Memory* findMapTarget(size_t size) const;
//! Adds a map target to the cache
bool addMapTarget(amd::Memory* memory) const;
//! Returns resource cache object
ResourceCache& resourceCache() const { return *resourceCache_; }
//! Returns the number of available compute rings
uint numComputeEngines() const { return computeEnginesId_.size(); }
//! Returns the vector of available compute rings with the engine index
const std::vector<uint32_t>& computeEnginesId() const { return computeEnginesId_; }
//! Returns the number of available compute rings
uint numExclusiveComputeEngines() const {
return exclusiveComputeEnginesId_.size() +
((exclusiveComputeEnginesId().find(ExclusiveQueueType::RealTime1) ==
exclusiveComputeEnginesId().end())
? 1
: 0);
}
//! Returns the map of available exclusive compute rings with the engine index
const std::map<ExclusiveQueueType, uint32_t>& exclusiveComputeEnginesId() const {
return exclusiveComputeEnginesId_;
}
//! Returns the number of available DMA engines
uint numDMAEngines() const { return numDmaEngines_; }
//! Returns engines object
const device::BlitManager& xferMgr() const;
VirtualGPU* xferQueue() const { return xferQueue_; }
//! Retrieves the internal format from the OCL format
Pal::ChNumFormat getPalFormat(const amd::Image::Format& format, //! OCL image format
Pal::ChannelMapping* channel) const;
const ScratchBuffer* scratch(uint idx) const { return scratch_[idx]; }
//! Returns the global scratch buffer
Memory* globalScratchBuf() const { return globalScratchBuf_; };
//! Destroys scratch buffer memory
void destroyScratchBuffers();
//! Initialize heap resources if uninitialized
bool initializeHeapResources();
//! Set HW sampler to the specified state
void fillHwSampler(uint32_t state, //!< Sampler's OpenCL state
void* hwState, //!< Sampler's HW state
uint32_t hwStateSize, //!< Size of sampler's HW state
const uint* addressMode, //!< Address modes in X, Y and Z
uint32_t mipFilter = CL_FILTER_NONE, //!< Mip filter
float minLod = 0.f, //!< Min level of detail
float maxLod = CL_MAXFLOAT //!< Max level of detail
) const;
//! host memory alloc
virtual void* hostAlloc(size_t size, size_t alignment, MemorySegment mem_seg = kNoAtomics,
const void* agentInfo = nullptr) const override;
//! SVM allocation
virtual void* svmAlloc(amd::Context& context, size_t size, size_t alignment,
cl_svm_mem_flags flags, void* svmPtr) const;
bool allowPeerAccess(device::Memory* memory) const;
//! Free host SVM memory
void hostFree(void* ptr, size_t size) const;
//! SVM free
virtual void svmFree(void* ptr) const;
//! Virtual address space allocation(reservation)
virtual void* virtualAlloc(void* addr, size_t size, size_t alignment);
virtual bool virtualFree(void* addr);
//! Set/Get memory access set by the app
virtual bool SetMemAccess(void* va_addr, size_t va_size, VmmAccess access_flags,
VmmLocationType = VmmLocationType::kDevice);
virtual bool GetMemAccess(void* va_addr, VmmAccess* access_flags_ptr) const;
virtual bool ValidateMemAccess(amd::Memory& mem, bool read_write) const;
virtual bool ExportShareableVMMHandle(amd::Memory& amd_mem_obj, int flags, void* shareableHandle);
virtual amd::Memory* ImportShareableVMMHandle(void* osHandle);
//! Returns SRD manger object
SrdManager& srds() const { return *srdManager_; }
//! Returns PAL device properties
const Pal::DeviceProperties& properties() const { return properties_; }
//! Returns PAL platform interface
Pal::IPlatform* iPlat() const { return platform_; }
//! Returns PAL device interface
Pal::IDevice* iDev() const { return device_; }
//! Allow access for peer device
bool deviceAllowAccess(void* dst) const;
//! Returns a handle to the capture manager (RGP or UberTrace)
ICaptureMgr* captureMgr() const { return captureMgr_; }
//! Update free memory for OCL extension
void updateAllocedMemory(Pal::GpuHeap heap, //!< PAL GPU heap for update
Pal::gpusize size, //!< Size of alocated/destroyed memory
bool free //!< TRUE if runtime frees memory
) const;
//! Create internal blit program
bool createBlitProgram();
//! Interop for GL device
bool initGLInteropPrivateExt(void* GLplatformContext, void* GLdeviceContext) const;
bool glCanInterop(void* GLplatformContext, void* GLdeviceContext) const;
bool resGLAssociate(void* GLContext, uint name, uint type, Pal::OsExternalHandle* handle,
void** mbResHandle, size_t* offset, cl_image_format& newClFormat
#ifdef ATI_OS_WIN
,
Pal::DoppDesktopInfo& doppDesktopInfo
#endif
) const;
bool resGLAcquire(void* GLplatformContext, void* mbResHandle, uint type) const;
bool resGLRelease(void* GLplatformContext, void* mbResHandle, uint type) const;
bool resGLFree(void* GLplatformContext, void* mbResHandle, uint type) const;
//! Adds a resource to the global list
void addResource(Resource* res) const {
amd::ScopedLock lock(lockResources());
auto findIt = resourceList_->find(res);
res->resizeGpuEvents(numOfVgpus() - 1);
if (resourceList_->end() == findIt) {
resourceList_->insert(res);
}
}
//! Removes a resource from the global list
void removeResource(Resource* res) const {
amd::ScopedLock lock(lockResources());
resourceList_->erase(res);
}
//! Resizes global resource list to accumulate a new queue
void resizeResoureList(uint index) const {
// Not safe to resize the list when runtime creates/destroys a queue at the same time
// or other queues process a command, since the size of the TS array can change
Device::ScopedLockVgpus v(*this);
amd::ScopedLock r(lockResources());
for (const auto& it : *resourceList_) {
it->resizeGpuEvents(index);
}
}
//! Erases an old queue from the list
void eraseResoureList(uint index) const {
amd::ScopedLock lock(lockResources());
for (const auto& it : *resourceList_) {
it->eraseGpuEvents(index);
}
}
bool AcquireExclusiveGpuAccess();
void ReleaseExclusiveGpuAccess(VirtualGPU& vgpu) const;
//! Returns PAL Queue pool for recycling
std::map<Pal::IQueue*, QueueRecycleInfo*>& QueuePool() { return queue_pool_; }
const std::map<Pal::IQueue*, QueueRecycleInfo*>& QueuePool() const { return queue_pool_; }
virtual bool findLinkInfo(const amd::Device& other_device, std::vector<LinkAttrType>* link_attr) {
// Unsupported in PAL
LogError("The function is unsupported on Windows");
return false;
}
virtual bool importExtSemaphore(void** extSemaphore, const amd::Os::FileDesc& handle,
amd::ExternalSemaphoreHandleType sem_handle_type) override;
virtual void DestroyExtSemaphore(void* extSemaphore);
#if defined(__clang__)
#if __has_feature(address_sanitizer)
virtual device::UriLocator* createUrilocator() const { return nullptr; }
#endif
#endif
//! Allocates hidden heap for device memory allocations
void HiddenHeapAlloc(const VirtualGPU& gpu);
Pal::gpusize GetMaxFrameBuffer() const { return maxFrameBufferAllocation_; }
Pal::gpusize TotalAlloc() const {
Pal::gpusize local = allocedMem[Pal::GpuHeapLocal] - resourceCache().persistentCacheSize();
Pal::gpusize invisible = allocedMem[Pal::GpuHeapInvisible] - resourceCache().lclCacheSize();
Pal::gpusize total_alloced = local + invisible;
return total_alloced;
}
private:
static void PAL_STDCALL PalDeveloperCallback(void* pPrivateData, const Pal::uint32 deviceIndex,
Pal::Developer::CallbackType type, void* pCbData);
//! Disable copy constructor
Device(const Device&);
//! Disable assignment
Device& operator=(const Device&);
//! Sends the stall command to all queues
bool stallQueues();
//! Buffer allocation
pal::Memory* createBuffer(amd::Memory& owner, //!< Abstraction layer memory object
bool directAccess //!< Use direct host memory access
) const;
//! Image allocation
pal::Memory* createImage(amd::Memory& owner, //!< Abstraction layer memory object
bool directAccess //!< Use direct host memory access
) const;
//! Allocates/reallocates the scratch buffer, according to the usage
bool allocScratch(uint regNum, //!< Number of the scratch registers
const VirtualGPU* vgpu, //!< Virtual GPU for the allocation
uint vgprs //!< Used VGPRs in the kernel
);
//! Interop for D3D devices
bool associateD3D11Device(void* d3d11Device //!< void* is of type ID3D11Device*
);
bool associateD3D10Device(void* d3d10Device //!< void* is of type ID3D10Device*
);
bool associateD3D9Device(void* d3d9Device //!< void* is of type IDirect3DDevice9*
);
//! Interop for GL device
bool glAssociate(void* GLplatformContext, void* GLdeviceContext) const;
bool glDissociate(void* GLplatformContext, void* GLdeviceContext) const;
static char* platformObj_; //!< Memory allocated for PAL platform object
static Pal::IPlatform* platform_; //!< Pointer to the PAL platform object
mutable amd::Monitor lockAsyncOps_; //!< Lock to serialise all async ops on this device
//! Lock to serialise all async ops on initialization heap operation
mutable amd::Monitor lockForInitHeap_;
mutable amd::Monitor lockPAL_; //!< Lock to serialise PAL access
mutable amd::Monitor vgpusAccess_; //!< Lock to serialise virtual gpu list access
mutable amd::Monitor scratchAlloc_; //!< Lock to serialise scratch allocation
mutable amd::Monitor mapCacheOps_; //!< Lock to serialise cache for the map resources
mutable amd::Monitor lockResourceOps_; //!< Lock to serialise resource access
mutable std::mutex lockAllowAccess_; //!< To serialize allow_access calls
XferBuffers* xferRead_; //!< Transfer buffers read
std::vector<amd::Memory*>* mapCache_; //!< Map cache info structure
ResourceCache* resourceCache_; //!< Resource cache
std::map<ExclusiveQueueType, uint32_t>
exclusiveComputeEnginesId_; //!< The number of available compute engines
std::vector<uint32_t> computeEnginesId_; //!< PAL index for compute engine
uint numDmaEngines_; //!< The number of available compute engines
bool heapInitComplete_; //!< Keep track of initialization status of heap resources
VirtualGPU* xferQueue_; //!< Transfer queue
std::vector<ScratchBuffer*> scratch_; //!< Scratch buffers for kernels
Memory* globalScratchBuf_; //!< Global scratch buffer
SrdManager* srdManager_; //!< SRD manager object
static AppProfile appProfile_; //!< application profile
mutable bool freeCPUMem_ = false; //!< flag to mark GPU free SVM CPU mem
Pal::DeviceProperties properties_; //!< PAL device properties
Pal::IDevice* device_; //!< PAL device object
mutable std::atomic<Pal::gpusize>
allocedMem[Pal::GpuHeap::GpuHeapCount]; //!< Free memory counter
std::unordered_set<Resource*>* resourceList_; //!< Active resource list
ICaptureMgr* captureMgr_; //!< RGP/UberTrace capture manager
Pal::GpuMemoryHeapProperties
heaps_[Pal::GpuHeapCount]; //!< Information about heaps, returned from PAL
Pal::gpusize maxFrameBufferAllocation_; //!< To reserve some memory in frame buffer
std::map<Pal::IQueue*, QueueRecycleInfo*> queue_pool_; //!< Pool of PAL queues for recycling
amd::Program* trap_handler_ = nullptr; //!< Trap handler program for debugger setup
};
/*@}*/ // namespace amd::pal
} // namespace amd::pal