647aba6ed2
ECR #399840 - re-checkin of CL1109955 with the fix of OpenCL sanity check timeout (hw debug flag initialization) Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.cpp#4 edit ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_debugger_amd.h#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.cpp#174 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#238 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugger.hpp#3 add ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.cpp#3 add ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudebugmanager.hpp#3 add ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#490 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#137 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#275 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#106 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuresource.cpp#200 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuscsi.cpp#30 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusettings.cpp#297 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#346 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.hpp#124 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLContext.cpp#69 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gslbe/src/rt/GSLContext.h#42 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.cpp#3 add ... //depot/stg/opencl/drivers/opencl/runtime/device/hwdebug.hpp#4 edit ... //depot/stg/opencl/drivers/opencl/runtime/utils/flags.hpp#223 edit
644 行
21 KiB
C++
644 行
21 KiB
C++
//
|
|
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
#ifndef GPU_HPP_
|
|
#define GPU_HPP_
|
|
|
|
#include "top.hpp"
|
|
#include "device/device.hpp"
|
|
#include "platform/command.hpp"
|
|
#include "platform/program.hpp"
|
|
#include "platform/perfctr.hpp"
|
|
#include "platform/threadtrace.hpp"
|
|
#include "platform/memory.hpp"
|
|
#include "utils/concurrent.hpp"
|
|
#include "thread/thread.hpp"
|
|
#include "thread/monitor.hpp"
|
|
#include "device/gpu/gpuvirtual.hpp"
|
|
#include "device/gpu/gpumemory.hpp"
|
|
#include "device/gpu/gpudefs.hpp"
|
|
#include "device/gpu/gpusettings.hpp"
|
|
#include "device/gpu/gpuappprofile.hpp"
|
|
|
|
|
|
#include "acl.h"
|
|
#include "vaminterface.h"
|
|
|
|
/*! \addtogroup GPU
|
|
* @{
|
|
*/
|
|
|
|
//! GPU Device Implementation
|
|
namespace gpu {
|
|
|
|
//! A nil device object
|
|
class NullDevice : public amd::Device
|
|
{
|
|
protected:
|
|
static aclCompiler* compiler_;
|
|
static aclCompiler* hsaCompiler_;
|
|
public:
|
|
aclCompiler* compiler() const { return compiler_; }
|
|
aclCompiler* hsaCompiler() const { return hsaCompiler_; }
|
|
|
|
public:
|
|
static bool init(void);
|
|
|
|
//! Construct a new identifier
|
|
NullDevice();
|
|
|
|
//! Creates an offline device with the specified target
|
|
bool create(
|
|
CALtarget target //!< GPU device identifier
|
|
);
|
|
|
|
virtual cl_int createSubDevices(
|
|
device::CreateSubDevicesInfo& create_info,
|
|
cl_uint num_entries,
|
|
cl_device_id* devices,
|
|
cl_uint* num_devices) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
//! Instantiate a new virtual device
|
|
virtual device::VirtualDevice* createVirtualDevice(
|
|
bool profiling,
|
|
bool interopQueue
|
|
#if cl_amd_open_video
|
|
, void* calVideoProperties = NULL
|
|
#endif // cl_amd_open_video
|
|
, uint deviceQueueSize = 0
|
|
) { return NULL; }
|
|
|
|
//! Compile the given source code.
|
|
virtual device::Program* createProgram(int oclVer = 120);
|
|
|
|
//! Just returns NULL for the dummy device
|
|
virtual device::Memory* createMemory(amd::Memory& owner) const { return NULL; }
|
|
|
|
//! Sampler object allocation
|
|
virtual bool createSampler(
|
|
const amd::Sampler& owner, //!< abstraction layer sampler object
|
|
device::Sampler** sampler //!< device sampler object
|
|
) const
|
|
{
|
|
ShouldNotReachHere();
|
|
return true;
|
|
}
|
|
|
|
//! Just returns NULL for the dummy device
|
|
virtual device::Memory* createView(
|
|
amd::Memory& owner, //!< Owner memory object
|
|
const device::Memory& parent //!< Parent device memory object for the view
|
|
) const { return NULL; }
|
|
|
|
//! Reallocates the provided buffer object
|
|
virtual bool reallocMemory(amd::Memory& owner) const { return true; }
|
|
|
|
//! Acquire external graphics API object in the host thread
|
|
//! Needed for OpenGL objects on CPU device
|
|
|
|
virtual bool bindExternalDevice(
|
|
intptr_t type, void* pDevice, void* pContext, bool validateOnly) { return true; }
|
|
|
|
virtual bool unbindExternalDevice(
|
|
intptr_t type, void* pDevice, void* pContext, bool validateOnly) { return true; }
|
|
|
|
//! Gets a pointer to a region of host-visible memory for use as the target
|
|
//! of a non-blocking map for a given memory object
|
|
virtual void* allocMapTarget(
|
|
amd::Memory& mem, //!< Abstraction layer memory object
|
|
const amd::Coord3D& origin, //!< The map location in memory
|
|
const amd::Coord3D& region, //!< The map region in memory
|
|
uint mapFlags, //!< Map flags
|
|
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
|
|
size_t* slicePitch = NULL //!< Slice for the mapped memory
|
|
) { return NULL; }
|
|
|
|
//! Releases non-blocking map target memory
|
|
virtual void freeMapTarget(amd::Memory& mem, void* target) {}
|
|
|
|
CALtarget calTarget() const { return calTarget_; }
|
|
|
|
const AMDDeviceInfo* hwInfo() const { return hwInfo_; }
|
|
|
|
//! Empty implementation on Null device
|
|
virtual bool globalFreeMemory(size_t* freeMemory) const { return false; }
|
|
|
|
//! Get GPU device settings
|
|
const gpu::Settings& settings() const
|
|
{ return reinterpret_cast<gpu::Settings&>(*settings_); }
|
|
virtual void* svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_mem_flags flags, void* svmPtr) const { return NULL; }
|
|
virtual void svmFree(void* ptr) const {return;}
|
|
|
|
protected:
|
|
CALtarget calTarget_; //!< GPU device identifier
|
|
const AMDDeviceInfo* hwInfo_; //!< Device HW info structure
|
|
};
|
|
|
|
//! Forward declarations
|
|
class Command;
|
|
class Device;
|
|
class GpuCommand;
|
|
class Heap;
|
|
class HeapBlock;
|
|
class Program;
|
|
class Kernel;
|
|
class Memory;
|
|
class Resource;
|
|
class VirtualDevice;
|
|
class PrintfDbg;
|
|
class ThreadTrace;
|
|
|
|
class Sampler : public device::Sampler
|
|
{
|
|
public:
|
|
//! Constructor
|
|
Sampler(const Device& dev): dev_(dev) {}
|
|
|
|
//! Default destructor for the device memory object
|
|
virtual ~Sampler();
|
|
|
|
//! Creates a device sampler from the OCL sampler state
|
|
bool create(
|
|
uint32_t oclSamplerState //!< OCL sampler state
|
|
);
|
|
|
|
const void* hwState() const { return hwState_; }
|
|
|
|
private:
|
|
//! Disable default copy constructor
|
|
Sampler& operator=(const Sampler&);
|
|
|
|
//! Disable operator=
|
|
Sampler(const Sampler&);
|
|
|
|
const Device& dev_; //!< Device object associated with the sampler
|
|
address hwState_; //!< GPU HW state (\todo legacy path)
|
|
};
|
|
|
|
//! A GPU device ordinal (physical GPU device)
|
|
class Device : public NullDevice, public CALGSLDevice
|
|
{
|
|
public:
|
|
//! Locks any access to the virtual GPUs
|
|
class ScopedLockVgpus : public amd::StackObject {
|
|
public:
|
|
//! Default constructor
|
|
ScopedLockVgpus(const Device& dev);
|
|
|
|
//! Destructor
|
|
~ScopedLockVgpus();
|
|
|
|
private:
|
|
const Device& dev_; //! Device object
|
|
};
|
|
|
|
//! Interop emulation flags
|
|
enum InteropEmulationFlags
|
|
{
|
|
D3D10Device = 0x00000001,
|
|
GLContext = 0x00000002,
|
|
};
|
|
|
|
class Engines : public amd::EmbeddedObject
|
|
{
|
|
public:
|
|
//! Default constructor
|
|
Engines() { memset(desc_, 0xff, sizeof(desc_)); }
|
|
|
|
//! Creates engine descriptor for this class
|
|
void create(uint num, gslEngineDescriptor* desc, uint maxNumComputeRings);
|
|
|
|
//! Gets engine type mask
|
|
uint getMask(gslEngineID id) const { return (1 << id); }
|
|
|
|
//! Gets a descriptor for the requested engines
|
|
uint getRequested(uint engines, gslEngineDescriptor* desc) const;
|
|
|
|
//! Returns the number of available compute rings
|
|
uint numComputeRings() const { return numComputeRings_; }
|
|
|
|
private:
|
|
uint numComputeRings_;
|
|
gslEngineDescriptor desc_[GSL_ENGINEID_MAX]; //!< Engine descriptor
|
|
};
|
|
|
|
//! Transfer buffers
|
|
class XferBuffers : public amd::HeapObject
|
|
{
|
|
public:
|
|
static const size_t MaxXferBufListSize = 8;
|
|
|
|
//! Default constructor
|
|
XferBuffers(const Device& device, Resource::MemoryType type, size_t bufSize)
|
|
: type_(type)
|
|
, bufSize_(bufSize)
|
|
, acquiredCnt_(0)
|
|
, gpuDevice_(device)
|
|
{}
|
|
|
|
//! Default destructor
|
|
~XferBuffers();
|
|
|
|
//! Creates the xfer buffers object
|
|
bool create();
|
|
|
|
//! Acquires an instance of the transfer buffers
|
|
Resource& acquire();
|
|
|
|
//! Releases transfer buffer
|
|
void release(
|
|
VirtualGPU& gpu, //!< Virual GPU object used with the buffer
|
|
Resource& buffer //!< Transfer buffer for release
|
|
);
|
|
|
|
//! Returns the buffer's size for transfer
|
|
size_t bufSize() const { return bufSize_; }
|
|
|
|
private:
|
|
//! Disable copy constructor
|
|
XferBuffers(const XferBuffers&);
|
|
|
|
//! Disable assignment operator
|
|
XferBuffers& operator=(const XferBuffers&);
|
|
|
|
//! Get device object
|
|
const Device& dev() const { return gpuDevice_; }
|
|
|
|
Resource::MemoryType type_; //!< The buffer's type
|
|
size_t bufSize_; //!< Staged buffer size
|
|
std::list<Resource*> freeBuffers_; //!< The list of free buffers
|
|
amd::Atomic<uint> acquiredCnt_; //!< The total number of acquired buffers
|
|
amd::Monitor lock_; //!< Stgaed buffer acquire/release lock
|
|
const Device& gpuDevice_; //!< GPU device object
|
|
};
|
|
|
|
//! Virtual address cache entry
|
|
struct VACacheEntry : public amd::HeapObject
|
|
{
|
|
void* startAddress_; //!< Start virtual address
|
|
void* endAddress_; //!< End virtual address
|
|
Memory* memory_; //!< GPU memory, associated with the range
|
|
|
|
//! Constructor
|
|
VACacheEntry(
|
|
void* startAddress, //!< Start virtual address
|
|
void* endAddress, //!< End virtual address
|
|
Memory* memory //!< GPU memory object
|
|
): startAddress_(startAddress), endAddress_(endAddress), memory_(memory) {}
|
|
|
|
private:
|
|
//! Disable default constructor
|
|
VACacheEntry();
|
|
};
|
|
|
|
struct ScratchBuffer : public amd::HeapObject
|
|
{
|
|
uint regNum_; //!< The number of used scratch registers
|
|
std::vector<Memory*> memObjs_; //!< Memory objects for scratch buffers
|
|
uint offset_; //!< Offset from the global scratch store
|
|
uint size_; //!< Scratch buffer size on this queue
|
|
|
|
//! Default constructor
|
|
ScratchBuffer(uint numMems): regNum_(0), memObjs_(numMems), offset_(0) {}
|
|
|
|
//! Default constructor
|
|
~ScratchBuffer();
|
|
|
|
//! Destroys memory objects
|
|
void destroyMemory();
|
|
};
|
|
|
|
|
|
class SrdManager : public amd::HeapObject {
|
|
public:
|
|
SrdManager(const Device& dev, uint srdSize, uint bufSize)
|
|
: dev_(dev)
|
|
, numFlags_(bufSize / (srdSize * MaskBits))
|
|
, srdSize_(srdSize)
|
|
, bufSize_(bufSize) {}
|
|
~SrdManager();
|
|
|
|
//! Allocates a new SRD slot for a resource
|
|
uint64_t allocSrdSlot(address* cpuAddr);
|
|
|
|
//! Frees a SRD slot
|
|
void freeSrdSlot(uint64_t addr);
|
|
|
|
// Fills the resource list for VidMM KMD
|
|
void fillResourceList(std::vector<const Resource*>& memList);
|
|
|
|
private:
|
|
//! Disable copy constructor
|
|
SrdManager(const SrdManager&);
|
|
|
|
//! Disable assignment operator
|
|
SrdManager& operator=(const SrdManager&);
|
|
|
|
struct Chunk {
|
|
Memory* buf_;
|
|
uint* flags_;
|
|
Chunk(): buf_(NULL), flags_(NULL) {}
|
|
};
|
|
|
|
static const uint MaskBits = 32;
|
|
const Device& dev_; //!< GPU device for the chunk manager
|
|
amd::Monitor ml_; //!< Global lock for the SRD manager
|
|
std::vector<Chunk> pool_; //!< Pool of SRD buffers
|
|
uint numFlags_; //!< Total number of flags in array
|
|
uint srdSize_; //!< SRD size
|
|
uint bufSize_; //!< Buffer size that holds SRDs
|
|
};
|
|
|
|
//! Initialise the whole GPU device subsystem (CAL init, device enumeration, etc).
|
|
static bool init();
|
|
|
|
//! Shutdown the whole GPU device subsystem (CAL shutdown).
|
|
static void tearDown();
|
|
|
|
//! Construct a new physical GPU device
|
|
Device();
|
|
|
|
//! Initialise a device (i.e. all parts of the constructor that could
|
|
//! potentially fail)
|
|
bool create(
|
|
CALuint ordinal, //!< GPU device ordinal index. Starts from 0
|
|
CALuint numOfDevices //!< number of GPU devices in the system
|
|
);
|
|
|
|
//! Destructor for the physical GPU device
|
|
virtual ~Device();
|
|
|
|
//! Reallocates current global heap
|
|
bool reallocHeap(
|
|
size_t size, //!< requested size for reallocation
|
|
bool remoteAlloc //!< allocate the new heap in remote memory
|
|
);
|
|
|
|
//! Instantiate a new virtual device
|
|
device::VirtualDevice* createVirtualDevice(
|
|
bool profiling,
|
|
bool interopQueue
|
|
#if cl_amd_open_video
|
|
, void* calVideoProperties = NULL
|
|
#endif // cl_amd_open_video
|
|
, uint deviceQueueSize = 0
|
|
);
|
|
|
|
//! Memory allocation
|
|
virtual device::Memory* createMemory(
|
|
amd::Memory& owner //!< abstraction layer memory object
|
|
) const;
|
|
|
|
//! Sampler object allocation
|
|
virtual bool createSampler(
|
|
const amd::Sampler& owner, //!< abstraction layer sampler object
|
|
device::Sampler** sampler //!< device sampler object
|
|
) const;
|
|
|
|
//! Reallocates the provided buffer object
|
|
virtual bool reallocMemory(
|
|
amd::Memory& owner //!< Buffer for reallocation
|
|
) const;
|
|
|
|
//! Allocates a view object from the device memory
|
|
virtual device::Memory* createView(
|
|
amd::Memory& owner, //!< Owner memory object
|
|
const device::Memory& parent //!< Parent device memory object for the view
|
|
) const;
|
|
|
|
//! Create the device program.
|
|
virtual device::Program* createProgram(int oclVer = 120);
|
|
|
|
//! Attempt to bind with external graphics API's device/context
|
|
virtual bool bindExternalDevice(
|
|
intptr_t type,
|
|
void* pDevice,
|
|
void* pContext,
|
|
bool validateOnly);
|
|
|
|
//! Attempt to unbind with external graphics API's device/context
|
|
virtual bool unbindExternalDevice(
|
|
intptr_t type,
|
|
void* pDevice,
|
|
void* pContext,
|
|
bool validateOnly);
|
|
|
|
//! Validates kernel before execution
|
|
virtual bool validateKernel(
|
|
const amd::Kernel& kernel, //!< AMD kernel object
|
|
const device::VirtualDevice* vdev
|
|
);
|
|
|
|
//! Gets a pointer to a region of host-visible memory for use as the target
|
|
//! of a non-blocking map for a given memory object
|
|
virtual void* allocMapTarget(
|
|
amd::Memory& mem, //!< Abstraction layer memory object
|
|
const amd::Coord3D& origin, //!< The map location in memory
|
|
const amd::Coord3D& region, //!< The map region in memory
|
|
uint mapFlags, //!< Map flags
|
|
size_t* rowPitch = NULL, //!< Row pitch for the mapped memory
|
|
size_t* slicePitch = NULL //!< Slice for the mapped memory
|
|
);
|
|
|
|
//! Retrieves information about free memory on a GPU device
|
|
virtual bool globalFreeMemory(size_t* freeMemory) const;
|
|
|
|
//! Returns a GPU memory object from AMD memory object
|
|
gpu::Memory* getGpuMemory(
|
|
amd::Memory* mem //!< Pointer to AMD memory object
|
|
) const;
|
|
|
|
//! Gets the GPU resource associated with the global heap
|
|
const Resource& globalMem() const { return heap_->resource(); }
|
|
|
|
//! Gets the global heap object
|
|
const Heap* heap() const { return heap_; }
|
|
|
|
//! Allocates a heap block from the global heap
|
|
HeapBlock* allocHeapBlock(
|
|
size_t size //!< The heap block size for allocation
|
|
) const;
|
|
|
|
//! Gets the memory object for the dummy page
|
|
amd::Memory* dummyPage() const { return dummyPage_; }
|
|
|
|
amd::Monitor& lockAsyncOps() const { return *lockAsyncOps_; }
|
|
|
|
//! Returns the lock object for the virtual gpus list
|
|
amd::Monitor* vgpusAccess() const { return vgpusAccess_; }
|
|
|
|
//! Returns the number of virtual GPUs allocated on this device
|
|
uint numOfVgpus() const { return numOfVgpus_; }
|
|
uint numOfVgpus_; //!< The number of virtual GPUs (lock protected)
|
|
|
|
typedef std::vector<VirtualGPU*> VirtualGPUs;
|
|
|
|
//! Returns the list of all virtual GPUs running on this device
|
|
const VirtualGPUs vgpus() const { return vgpus_; }
|
|
VirtualGPUs vgpus_; //!< The list of all running virtual gpus (lock protected)
|
|
|
|
//! Scratch buffer allocation
|
|
gpu::Memory* createScratchBuffer(
|
|
size_t size //!< Size of buffer
|
|
) const;
|
|
|
|
//! Returns transfer buffer object
|
|
XferBuffers& xferWrite() const { return *xferWrite_; }
|
|
|
|
//! Returns transfer buffer object
|
|
XferBuffers& xferRead() const { return *xferRead_; }
|
|
|
|
//! Adds GPU memory to the VA cache list
|
|
void addVACache(Memory* memory) const;
|
|
|
|
//! Removes GPU memory from the VA cache list
|
|
void removeVACache(const Memory* memory) const;
|
|
|
|
//! Finds GPU memory from virtual address
|
|
Memory* findMemoryFromVA(const void* ptr, size_t* offset) const;
|
|
|
|
//! Finds an appropriate map target
|
|
amd::Memory* findMapTarget(size_t size) const;
|
|
|
|
//! Adds a map target to the cache
|
|
bool addMapTarget(amd::Memory* memory) const;
|
|
|
|
//! Returns resource cache object
|
|
ResourceCache& resourceCache() const { return *resourceCache_; }
|
|
|
|
//! Returns engines object
|
|
const Engines& engines() const { return engines_; }
|
|
|
|
//! Returns engines object
|
|
const device::BlitManager& xferMgr() const { return xferQueue_->blitMgr(); }
|
|
|
|
VirtualGPU* xferQueue() const { return xferQueue_; }
|
|
|
|
//! Retrieves the internal format from the OCL format
|
|
CalFormat getCalFormat(
|
|
const amd::Image::Format& format //! OCL image format
|
|
) const;
|
|
|
|
//! Retrieves the OCL format from the internal image format
|
|
amd::Image::Format getOclFormat(
|
|
const CalFormat& format //! Internal image format
|
|
) const;
|
|
|
|
const ScratchBuffer* scratch(uint idx) const { return scratch_[idx]; }
|
|
|
|
//! Returns the global scratch buffer
|
|
Memory* globalScratchBuf() const { return globalScratchBuf_; };
|
|
|
|
//! Destroys scratch buffer memory
|
|
void destroyScratchBuffers();
|
|
|
|
//! Initialize heap resources if uninitialized
|
|
bool initializeHeapResources();
|
|
|
|
//! Set GSL sampler to the specified state
|
|
void fillHwSampler(
|
|
uint32_t state, //!< Sampler's OpenCL state
|
|
void* hwState, //!< Sampler's HW state
|
|
uint32_t hwStateSize //!< Size of sampler's HW state
|
|
) const;
|
|
|
|
//! host memory alloc
|
|
virtual void* hostAlloc(size_t size, size_t alignment, bool atomics = false) const;
|
|
|
|
//! SVM allocation
|
|
virtual void* svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_mem_flags flags, void* svmPtr) const;
|
|
|
|
//! Free host SVM memory
|
|
void hostFree(void* ptr, size_t size) const;
|
|
|
|
//! SVM free
|
|
virtual void svmFree(void* ptr) const;
|
|
|
|
//! Returns SRD manger object
|
|
SrdManager& srds() const { return *srdManager_; }
|
|
|
|
//! Initial the Hardware Debug Manager
|
|
cl_int hwDebugManagerInit(amd::Context *context, uintptr_t messageStorage);
|
|
|
|
//! Remove the Hardware Debug Manager
|
|
void hwDebugManagerRemove();
|
|
|
|
private:
|
|
//! Disable copy constructor
|
|
Device(const Device&);
|
|
|
|
//! Disable assignment
|
|
Device& operator=(const Device&);
|
|
|
|
//! Sends the stall command to all queues
|
|
bool stallQueues();
|
|
|
|
//! Fills OpenCL device info structure
|
|
void fillDeviceInfo(
|
|
const CALdeviceattribs& calAttr, //!< CAL device attributes info
|
|
const CALdevicestatus& calStatus //!< CAL device status
|
|
#if cl_amd_open_video
|
|
,
|
|
const CALdeviceVideoAttribs& calVideoAttr //!< -"- video attrib. info
|
|
#endif //cl_amd_open_video
|
|
);
|
|
|
|
//! Buffer allocation from static heap (no VM mode only)
|
|
gpu::Memory* createBufferFromHeap(
|
|
amd::Memory& owner //!< Abstraction layer memory object
|
|
) const;
|
|
|
|
//! Buffer allocation
|
|
gpu::Memory* createBuffer(
|
|
amd::Memory& owner, //!< Abstraction layer memory object
|
|
bool directAccess, //!< Use direct host memory access
|
|
bool bufferAlloc //!< If TRUE, then don't use heap
|
|
) const;
|
|
|
|
//! Image allocation
|
|
gpu::Memory* createImage(
|
|
amd::Memory& owner, //!< Abstraction layer memory object
|
|
bool directAccess //!< Use direct host memory access
|
|
) const;
|
|
|
|
//! Allocates/reallocates the scratch buffer, according to the usage
|
|
bool allocScratch(
|
|
uint regNum, //!< Number of the scratch registers
|
|
const VirtualGPU* vgpu //!< Virtual GPU for the allocation
|
|
);
|
|
|
|
amd::Context* context_; //!< A dummy context for internal allocations
|
|
size_t heapSize_; //!< The global heap size
|
|
Heap* heap_; //!< GPU heap manager
|
|
amd::Memory* dummyPage_; //!< A dummy page for NULL pointer
|
|
|
|
amd::Monitor* lockAsyncOps_; //!< Lock to serialise all async ops on this device
|
|
amd::Monitor* lockAsyncOpsForInitHeap_; //!< Lock to serialise all async ops on initialization heap operation
|
|
amd::Monitor* vgpusAccess_; //!< Lock to serialise virtual gpu list access
|
|
amd::Monitor* scratchAlloc_; //!< Lock to serialise scratch allocation
|
|
amd::Monitor* mapCacheOps_; //!< Lock to serialise cache for the map resources
|
|
|
|
XferBuffers* xferRead_; //!< Transfer buffers read
|
|
XferBuffers* xferWrite_; //!< Transfer buffers write
|
|
|
|
amd::Monitor* vaCacheAccess_; //!< Lock to serialize VA caching access
|
|
std::list<VACacheEntry*>* vaCacheList_; //!< VA cache list
|
|
std::vector<amd::Memory*>* mapCache_; //!< Map cache info structure
|
|
ResourceCache* resourceCache_; //!< CAL resource cache
|
|
Engines engines_; //!< Available engines on device
|
|
bool heapInitComplete_; //!< Keep track of initialization status of heap resources
|
|
VirtualGPU* xferQueue_; //!< Transfer queue
|
|
std::vector<ScratchBuffer*> scratch_; //!< Scratch buffers for kernels
|
|
Memory* globalScratchBuf_; //!< Global scratch buffer
|
|
SrdManager* srdManager_; //!< SRD manager object
|
|
|
|
static AppProfile appProfile_; //!< application profile
|
|
};
|
|
|
|
/*@}*/} // namespace gpu
|
|
|
|
#endif /*GPU_HPP_*/
|