Files
rocm-systems/rocclr/runtime/device/gpu/gpudevice.hpp
T
foreman 768a8e7e78 P4 to Git Change 1211287 by xcui@merged_opencl_jxcwin on 2015/11/13 19:19:52
SWDEV-77172 - temporary disable the FGS optimization for a failure on stg.

	code review:
	http://ocltc.amd.com/reviews/r/8992/
	precheckin:
	http://ocltc.amd.com:8111/viewModification.html?modId=62238&personal=true&buildTypeId=&tab=vcsModificationBuilds&show_all_builds=true

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#263 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#537 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.hpp#156 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#310 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#391 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/context.cpp#37 edit
2015-11-13 19:47:56 -05:00

638 řádky
21 KiB
C++

//
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
//
#ifndef GPU_HPP_
#define GPU_HPP_
#include "top.hpp"
#include "device/device.hpp"
#include "platform/command.hpp"
#include "platform/program.hpp"
#include "platform/perfctr.hpp"
#include "platform/threadtrace.hpp"
#include "platform/memory.hpp"
#include "utils/concurrent.hpp"
#include "thread/thread.hpp"
#include "thread/monitor.hpp"
#include "device/gpu/gpuvirtual.hpp"
#include "device/gpu/gpumemory.hpp"
#include "device/gpu/gpudefs.hpp"
#include "device/gpu/gpusettings.hpp"
#include "device/gpu/gpuappprofile.hpp"
#include "acl.h"
#include "vaminterface.h"
/*! \addtogroup GPU
* @{
*/
//! GPU Device Implementation
namespace gpu {
//! A nil device object
class NullDevice : public amd::Device
{
protected:
static aclCompiler* compiler_;
static aclCompiler* hsaCompiler_;
public:
aclCompiler* compiler() const { return compiler_; }
aclCompiler* hsaCompiler() const { return hsaCompiler_; }
public:
static bool init(void);
//! Construct a new identifier
NullDevice();
//! Creates an offline device with the specified target
bool create(
CALtarget target //!< GPU device identifier
);
virtual cl_int createSubDevices(
device::CreateSubDevicesInfo& create_info,
cl_uint num_entries,
cl_device_id* devices,
cl_uint* num_devices) {
return CL_INVALID_VALUE;
}
//! Instantiate a new virtual device
virtual device::VirtualDevice* createVirtualDevice(
amd::CommandQueue* queue = NULL
) { return NULL; }
//! Create the device program.
virtual device::Program* createProgram(amd::option::Options* options = NULL);
//! Just returns NULL for the dummy device
virtual device::Memory* createMemory(amd::Memory& owner) const { return NULL; }
//! Sampler object allocation
virtual bool createSampler(
const amd::Sampler& owner, //!< abstraction layer sampler object
device::Sampler** sampler //!< device sampler object
) const
{
ShouldNotReachHere();
return true;
}
//! Just returns NULL for the dummy device
virtual device::Memory* createView(
amd::Memory& owner, //!< Owner memory object
const device::Memory& parent //!< Parent device memory object for the view
) const { return NULL; }
//! Reallocates the provided buffer object
virtual bool reallocMemory(amd::Memory& owner) const { return true; }
//! Acquire external graphics API object in the host thread
//! Needed for OpenGL objects on CPU device
virtual bool bindExternalDevice(
intptr_t type, void* pDevice, void* pContext, bool validateOnly) { return true; }
virtual bool unbindExternalDevice(
intptr_t type, void* pDevice, void* pContext, bool validateOnly) { return true; }
//! Releases non-blocking map target memory
virtual void freeMapTarget(amd::Memory& mem, void* target) {}
CALtarget calTarget() const { return calTarget_; }
const AMDDeviceInfo* hwInfo() const { return hwInfo_; }
//! Empty implementation on Null device
virtual bool globalFreeMemory(size_t* freeMemory) const { return false; }
//! Get GPU device settings
const gpu::Settings& settings() const
{ return reinterpret_cast<gpu::Settings&>(*settings_); }
virtual void* svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_mem_flags flags, void* svmPtr) const { return NULL; }
virtual void svmFree(void* ptr) const {return;}
protected:
CALtarget calTarget_; //!< GPU device identifier
const AMDDeviceInfo* hwInfo_; //!< Device HW info structure
//! Answer the question: "Should HSAIL Program be created?",
//! based on the given options.
bool isHsailProgram(amd::option::Options* options = NULL);
//! Fills OpenCL device info structure
void fillDeviceInfo(
const CALdeviceattribs& calAttr, //!< CAL device attributes info
const gslMemInfo& memInfo, //!< GSL mem info
size_t maxTextureSize, //!< Maximum texture size supported in HW
uint numComputeRings //!< Number of compute rings
);
};
//! Forward declarations
class Command;
class Device;
class GpuCommand;
class Heap;
class HeapBlock;
class Program;
class Kernel;
class Memory;
class Resource;
class VirtualDevice;
class PrintfDbg;
class ThreadTrace;
#ifndef CL_FILTER_NONE
#define CL_FILTER_NONE 0x1142
#endif
class Sampler : public device::Sampler
{
public:
//! Constructor
Sampler(const Device& dev): dev_(dev) {}
//! Default destructor for the device memory object
virtual ~Sampler();
//! Creates a device sampler from the OCL sampler state
bool create(
uint32_t oclSamplerState //!< OCL sampler state
);
//! Creates a device sampler from the OCL sampler state
bool create(
const amd::Sampler& owner //!< AMD sampler object
);
const void* hwState() const { return hwState_; }
private:
//! Disable default copy constructor
Sampler& operator=(const Sampler&);
//! Disable operator=
Sampler(const Sampler&);
const Device& dev_; //!< Device object associated with the sampler
address hwState_; //!< GPU HW state (\todo legacy path)
};
//! A GPU device ordinal (physical GPU device)
class Device : public NullDevice, public CALGSLDevice
{
public:
class Heap : public amd::EmbeddedObject
{
public:
//! The size of a heap element in bytes
static const size_t ElementSize = 4;
//! The type of a heap element in bytes
static const cmSurfFmt ElementType = CM_SURF_FMT_R32I;
Heap(): resource_(NULL), baseAddress_(0) {}
bool create(
Device& device //!< GPU device object
);
//! Gets the GPU resource associated with the global heap
const Memory& resource() const { return *resource_; }
//! Returns the base virtual address of the heap
uint64_t baseAddress() const { return baseAddress_; }
protected:
Memory* resource_; //!< GPU resource referencing the heap memory
uint64_t baseAddress_; //!< Virtual heap base address
};
//! Locks any access to the virtual GPUs
class ScopedLockVgpus : public amd::StackObject {
public:
//! Default constructor
ScopedLockVgpus(const Device& dev);
//! Destructor
~ScopedLockVgpus();
private:
const Device& dev_; //! Device object
};
//! Interop emulation flags
enum InteropEmulationFlags
{
D3D10Device = 0x00000001,
GLContext = 0x00000002,
};
class Engines : public amd::EmbeddedObject
{
public:
//! Default constructor
Engines() { memset(desc_, 0xff, sizeof(desc_)); }
//! Creates engine descriptor for this class
void create(uint num, gslEngineDescriptor* desc, uint maxNumComputeRings);
//! Gets engine type mask
uint getMask(gslEngineID id) const { return (1 << id); }
//! Gets a descriptor for the requested engines
uint getRequested(uint engines, gslEngineDescriptor* desc) const;
//! Returns the number of available compute rings
uint numComputeRings() const { return numComputeRings_; }
//! Returns the number of available DMA engines
uint numDMAEngines() const { return numDmaEngines_; }
private:
uint numComputeRings_;
uint numDmaEngines_;
gslEngineDescriptor desc_[GSL_ENGINEID_MAX]; //!< Engine descriptor
};
//! Transfer buffers
class XferBuffers : public amd::HeapObject
{
public:
static const size_t MaxXferBufListSize = 8;
//! Default constructor
XferBuffers(const Device& device, Resource::MemoryType type, size_t bufSize)
: type_(type)
, bufSize_(bufSize)
, acquiredCnt_(0)
, gpuDevice_(device)
{}
//! Default destructor
~XferBuffers();
//! Creates the xfer buffers object
bool create();
//! Acquires an instance of the transfer buffers
Memory& acquire();
//! Releases transfer buffer
void release(
VirtualGPU& gpu, //!< Virual GPU object used with the buffer
Memory& buffer //!< Transfer buffer for release
);
//! Returns the buffer's size for transfer
size_t bufSize() const { return bufSize_; }
private:
//! Disable copy constructor
XferBuffers(const XferBuffers&);
//! Disable assignment operator
XferBuffers& operator=(const XferBuffers&);
//! Get device object
const Device& dev() const { return gpuDevice_; }
Resource::MemoryType type_; //!< The buffer's type
size_t bufSize_; //!< Staged buffer size
std::list<Memory*> freeBuffers_; //!< The list of free buffers
amd::Atomic<uint> acquiredCnt_; //!< The total number of acquired buffers
amd::Monitor lock_; //!< Stgaed buffer acquire/release lock
const Device& gpuDevice_; //!< GPU device object
};
//! Virtual address cache entry
struct VACacheEntry : public amd::HeapObject
{
void* startAddress_; //!< Start virtual address
void* endAddress_; //!< End virtual address
Memory* memory_; //!< GPU memory, associated with the range
//! Constructor
VACacheEntry(
void* startAddress, //!< Start virtual address
void* endAddress, //!< End virtual address
Memory* memory //!< GPU memory object
): startAddress_(startAddress), endAddress_(endAddress), memory_(memory) {}
private:
//! Disable default constructor
VACacheEntry();
};
struct ScratchBuffer : public amd::HeapObject
{
uint regNum_; //!< The number of used scratch registers
Memory* memObj_; //!< Memory objects for scratch buffers
uint offset_; //!< Offset from the global scratch store
uint size_; //!< Scratch buffer size on this queue
//! Default constructor
ScratchBuffer(): regNum_(0), memObj_(NULL), offset_(0) {}
//! Default constructor
~ScratchBuffer();
//! Destroys memory objects
void destroyMemory();
};
class SrdManager : public amd::HeapObject {
public:
SrdManager(const Device& dev, uint srdSize, uint bufSize)
: dev_(dev)
, numFlags_(bufSize / (srdSize * MaskBits))
, srdSize_(srdSize)
, bufSize_(bufSize) {}
~SrdManager();
//! Allocates a new SRD slot for a resource
uint64_t allocSrdSlot(address* cpuAddr);
//! Frees a SRD slot
void freeSrdSlot(uint64_t addr);
// Fills the memory list for VidMM KMD
void fillResourceList(std::vector<const Memory*>& memList);
private:
//! Disable copy constructor
SrdManager(const SrdManager&);
//! Disable assignment operator
SrdManager& operator=(const SrdManager&);
struct Chunk {
Memory* buf_;
uint* flags_;
Chunk(): buf_(NULL), flags_(NULL) {}
};
static const uint MaskBits = 32;
const Device& dev_; //!< GPU device for the chunk manager
amd::Monitor ml_; //!< Global lock for the SRD manager
std::vector<Chunk> pool_; //!< Pool of SRD buffers
uint numFlags_; //!< Total number of flags in array
uint srdSize_; //!< SRD size
uint bufSize_; //!< Buffer size that holds SRDs
};
//! Initialise the whole GPU device subsystem (CAL init, device enumeration, etc).
static bool init();
//! Shutdown the whole GPU device subsystem (CAL shutdown).
static void tearDown();
//! Construct a new physical GPU device
Device();
//! Initialise a device (i.e. all parts of the constructor that could
//! potentially fail)
bool create(
CALuint ordinal, //!< GPU device ordinal index. Starts from 0
CALuint numOfDevices //!< number of GPU devices in the system
);
//! Destructor for the physical GPU device
virtual ~Device();
//! Instantiate a new virtual device
device::VirtualDevice* createVirtualDevice(
amd::CommandQueue* queue = NULL
);
//! Memory allocation
virtual device::Memory* createMemory(
amd::Memory& owner //!< abstraction layer memory object
) const;
//! Sampler object allocation
virtual bool createSampler(
const amd::Sampler& owner, //!< abstraction layer sampler object
device::Sampler** sampler //!< device sampler object
) const;
//! Reallocates the provided buffer object
virtual bool reallocMemory(
amd::Memory& owner //!< Buffer for reallocation
) const;
//! Allocates a view object from the device memory
virtual device::Memory* createView(
amd::Memory& owner, //!< Owner memory object
const device::Memory& parent //!< Parent device memory object for the view
) const;
//! Create the device program.
virtual device::Program* createProgram(amd::option::Options* options = NULL);
//! Attempt to bind with external graphics API's device/context
virtual bool bindExternalDevice(
intptr_t type,
void* pDevice,
void* pContext,
bool validateOnly);
//! Attempt to unbind with external graphics API's device/context
virtual bool unbindExternalDevice(
intptr_t type,
void* pDevice,
void* pContext,
bool validateOnly);
//! Validates kernel before execution
virtual bool validateKernel(
const amd::Kernel& kernel, //!< AMD kernel object
const device::VirtualDevice* vdev
);
//! Retrieves information about free memory on a GPU device
virtual bool globalFreeMemory(size_t* freeMemory) const;
//! Returns a GPU memory object from AMD memory object
gpu::Memory* getGpuMemory(
amd::Memory* mem //!< Pointer to AMD memory object
) const;
//! Gets the GPU resource associated with the global heap
const Memory& globalMem() const { return heap_.resource(); }
//! Gets the device context object
amd::Context& context() const { return *context_; }
//! Gets the global heap object
const Heap& heap() const { return heap_; }
//! Gets the memory object for the dummy page
amd::Memory* dummyPage() const { return dummyPage_; }
amd::Monitor& lockAsyncOps() const { return *lockAsyncOps_; }
//! Returns the lock object for the virtual gpus list
amd::Monitor* vgpusAccess() const { return vgpusAccess_; }
//! Returns the number of virtual GPUs allocated on this device
uint numOfVgpus() const { return numOfVgpus_; }
uint numOfVgpus_; //!< The number of virtual GPUs (lock protected)
typedef std::vector<VirtualGPU*> VirtualGPUs;
//! Returns the list of all virtual GPUs running on this device
const VirtualGPUs vgpus() const { return vgpus_; }
VirtualGPUs vgpus_; //!< The list of all running virtual gpus (lock protected)
//! Scratch buffer allocation
gpu::Memory* createScratchBuffer(
size_t size //!< Size of buffer
) const;
//! Returns transfer buffer object
XferBuffers& xferWrite() const { return *xferWrite_; }
//! Returns transfer buffer object
XferBuffers& xferRead() const { return *xferRead_; }
//! Adds GPU memory to the VA cache list
void addVACache(Memory* memory) const;
//! Removes GPU memory from the VA cache list
void removeVACache(const Memory* memory) const;
//! Finds GPU memory from virtual address
Memory* findMemoryFromVA(const void* ptr, size_t* offset) const;
//! Finds an appropriate map target
amd::Memory* findMapTarget(size_t size) const;
//! Adds a map target to the cache
bool addMapTarget(amd::Memory* memory) const;
//! Returns resource cache object
ResourceCache& resourceCache() const { return *resourceCache_; }
//! Returns engines object
const Engines& engines() const { return engines_; }
//! Returns engines object
const device::BlitManager& xferMgr() const;
VirtualGPU* xferQueue() const { return xferQueue_; }
//! Retrieves the internal format from the OCL format
CalFormat getCalFormat(
const amd::Image::Format& format //! OCL image format
) const;
//! Retrieves the OCL format from the internal image format
amd::Image::Format getOclFormat(
const CalFormat& format //! Internal image format
) const;
const ScratchBuffer* scratch(uint idx) const { return scratch_[idx]; }
//! Returns the global scratch buffer
Memory* globalScratchBuf() const { return globalScratchBuf_; };
//! Destroys scratch buffer memory
void destroyScratchBuffers();
//! Initialize heap resources if uninitialized
bool initializeHeapResources();
//! Set GSL sampler to the specified state
void fillHwSampler(
uint32_t state, //!< Sampler's OpenCL state
void* hwState, //!< Sampler's HW state
uint32_t hwStateSize, //!< Size of sampler's HW state
uint32_t mipFilter = CL_FILTER_NONE, //!< Mip filter
float minLod = 0.f, //!< Min level of detail
float maxLod = CL_MAXFLOAT //!< Max level of detail
) const;
//! host memory alloc
virtual void* hostAlloc(size_t size, size_t alignment, bool atomics = false) const;
//! SVM allocation
virtual void* svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_mem_flags flags, void* svmPtr) const;
//! Free host SVM memory
void hostFree(void* ptr, size_t size) const;
//! SVM free
virtual void svmFree(void* ptr) const;
//! Returns SRD manger object
SrdManager& srds() const { return *srdManager_; }
//! Initial the Hardware Debug Manager
cl_int hwDebugManagerInit(amd::Context *context, uintptr_t messageStorage);
private:
//! Disable copy constructor
Device(const Device&);
//! Disable assignment
Device& operator=(const Device&);
//! Sends the stall command to all queues
bool stallQueues();
//! Buffer allocation
gpu::Memory* createBuffer(
amd::Memory& owner, //!< Abstraction layer memory object
bool directAccess //!< Use direct host memory access
) const;
//! Image allocation
gpu::Memory* createImage(
amd::Memory& owner, //!< Abstraction layer memory object
bool directAccess //!< Use direct host memory access
) const;
//! Allocates/reallocates the scratch buffer, according to the usage
bool allocScratch(
uint regNum, //!< Number of the scratch registers
const VirtualGPU* vgpu //!< Virtual GPU for the allocation
);
amd::Context* context_; //!< A dummy context for internal allocations
Heap heap_; //!< GPU global heap
amd::Memory* dummyPage_; //!< A dummy page for NULL pointer
amd::Monitor* lockAsyncOps_; //!< Lock to serialise all async ops on this device
amd::Monitor* lockAsyncOpsForInitHeap_; //!< Lock to serialise all async ops on initialization heap operation
amd::Monitor* vgpusAccess_; //!< Lock to serialise virtual gpu list access
amd::Monitor* scratchAlloc_; //!< Lock to serialise scratch allocation
amd::Monitor* mapCacheOps_; //!< Lock to serialise cache for the map resources
XferBuffers* xferRead_; //!< Transfer buffers read
XferBuffers* xferWrite_; //!< Transfer buffers write
amd::Monitor* vaCacheAccess_; //!< Lock to serialize VA caching access
std::list<VACacheEntry*>* vaCacheList_; //!< VA cache list
std::vector<amd::Memory*>* mapCache_; //!< Map cache info structure
ResourceCache* resourceCache_; //!< Resource cache
Engines engines_; //!< Available engines on device
bool heapInitComplete_; //!< Keep track of initialization status of heap resources
VirtualGPU* xferQueue_; //!< Transfer queue
std::vector<ScratchBuffer*> scratch_; //!< Scratch buffers for kernels
Memory* globalScratchBuf_; //!< Global scratch buffer
SrdManager* srdManager_; //!< SRD manager object
static AppProfile appProfile_; //!< application profile
};
/*@}*/} // namespace gpu
#endif /*GPU_HPP_*/