Dosyalar
rocm-systems/rocclr/device/gpu/gpudevice.hpp
T
Laurent Morichetti d9d9c69399 Replace cl_* integral types with standard types.
cl_bool -> bool
cl_int -> int32_t
cl_uint -> uint32_t
cl_long -> int64_t
cl_ulong -> uint64_t
cl_float -> float
cl_double -> double
cl_bitfield -> uint64_t

Change-Id: I840c8993b55f98f5b745d21e27f5f28233647a58
2020-02-12 13:16:06 -08:00

597 satır
21 KiB
C++

/* Copyright (c) 2009-present Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef GPU_HPP_
#define GPU_HPP_
#include "top.hpp"
#include "device/device.hpp"
#include "platform/command.hpp"
#include "platform/program.hpp"
#include "platform/perfctr.hpp"
#include "platform/threadtrace.hpp"
#include "platform/memory.hpp"
#include "utils/concurrent.hpp"
#include "thread/thread.hpp"
#include "thread/monitor.hpp"
#include "device/gpu/gpuvirtual.hpp"
#include "device/gpu/gpumemory.hpp"
#include "device/gpu/gpudefs.hpp"
#include "device/gpu/gpusettings.hpp"
#include "device/gpu/gpuappprofile.hpp"
#include "acl.h"
#include "vaminterface.h"
/*! \addtogroup GPU
* @{
*/
//! GPU Device Implementation
namespace gpu {
//! A nil device object
class NullDevice : public amd::Device {
protected:
static aclCompiler* compiler_;
static aclCompiler* hsaCompiler_;
public:
aclCompiler* amdilCompiler() const { return compiler_; }
aclCompiler* hsaCompiler() const { return hsaCompiler_; }
aclCompiler* compiler() const { return hsaCompiler_; }
Compiler* binCompiler() const { return amdilCompiler(); }
static bool init(void);
//! Construct a new identifier
NullDevice();
//! Creates an offline device with the specified target
bool create(CALtarget target //!< GPU device identifier
);
//! Instantiate a new virtual device
virtual device::VirtualDevice* createVirtualDevice(amd::CommandQueue* queue = NULL) {
return NULL;
}
//! Create the device program.
virtual device::Program* createProgram(amd::Program& owner, amd::option::Options* options = NULL);
//! Just returns NULL for the dummy device
virtual device::Memory* createMemory(amd::Memory& owner) const { return NULL; }
//! Sampler object allocation
virtual bool createSampler(const amd::Sampler& owner, //!< abstraction layer sampler object
device::Sampler** sampler //!< device sampler object
) const {
ShouldNotReachHere();
return true;
}
//! Just returns NULL for the dummy device
virtual device::Memory* createView(
amd::Memory& owner, //!< Owner memory object
const device::Memory& parent //!< Parent device memory object for the view
) const {
return NULL;
}
//! Acquire external graphics API object in the host thread
//! Needed for OpenGL objects on CPU device
virtual bool bindExternalDevice(uint flags, void* const pDevice[], void* pContext,
bool validateOnly) {
return true;
}
virtual bool unbindExternalDevice(uint flags, void* const pDevice[], void* pContext,
bool validateOnly) {
return true;
}
//! Releases non-blocking map target memory
virtual void freeMapTarget(amd::Memory& mem, void* target) {}
CALtarget calTarget() const { return calTarget_; }
const AMDDeviceInfo* hwInfo() const { return hwInfo_; }
//! Empty implementation on Null device
virtual bool globalFreeMemory(size_t* freeMemory) const { return false; }
//! Get GPU device settings
const gpu::Settings& settings() const { return reinterpret_cast<gpu::Settings&>(*settings_); }
virtual void* svmAlloc(amd::Context& context, size_t size, size_t alignment,
cl_svm_mem_flags flags, void* svmPtr) const {
return NULL;
}
virtual void svmFree(void* ptr) const { return; }
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; }
protected:
bool usePal() const {
return (calTarget_ == CAL_TARGET_GREENLAND || calTarget_ == CAL_TARGET_RAVEN ||
calTarget_ == CAL_TARGET_RAVEN2 || calTarget_ == CAL_TARGET_RENOIR ||
calTarget_ >= CAL_TARGET_VEGA12);
}
//! Answer the question: "Should HSAIL Program be created?",
//! based on the given options.
bool isHsailProgram(amd::option::Options* options = NULL);
//! Fills OpenCL device info structure
void fillDeviceInfo(const CALdeviceattribs& calAttr, //!< CAL device attributes info
const gslMemInfo& memInfo, //!< GSL mem info
size_t maxTextureSize, //!< Maximum texture size supported in HW
uint numComputeRings, //!< Number of compute rings
uint numComputeRingsRT //!< Number of RT compute rings
);
CALtarget calTarget_; //!< GPU device identifier
const AMDDeviceInfo* hwInfo_; //!< Device HW info structure
};
//! Forward declarations
class Command;
class Device;
class GpuCommand;
class Heap;
class HeapBlock;
class Program;
class Kernel;
class Memory;
class Resource;
class VirtualDevice;
class PrintfDbg;
class ThreadTrace;
#ifndef CL_FILTER_NONE
#define CL_FILTER_NONE 0x1142
#endif
class Sampler : public device::Sampler {
public:
//! Constructor
Sampler(const Device& dev) : dev_(dev) {}
//! Default destructor for the device memory object
virtual ~Sampler();
//! Creates a device sampler from the OCL sampler state
bool create(uint32_t oclSamplerState //!< OCL sampler state
);
//! Creates a device sampler from the OCL sampler state
bool create(const amd::Sampler& owner //!< AMD sampler object
);
const void* hwState() const { return hwState_; }
private:
//! Disable default copy constructor
Sampler& operator=(const Sampler&);
//! Disable operator=
Sampler(const Sampler&);
const Device& dev_; //!< Device object associated with the sampler
address hwState_; //!< GPU HW state (\todo legacy path)
};
//! A GPU device ordinal (physical GPU device)
class Device : public NullDevice, public CALGSLDevice {
public:
class Heap : public amd::EmbeddedObject {
public:
//! The size of a heap element in bytes
static const size_t ElementSize = 4;
//! The type of a heap element in bytes
static const cmSurfFmt ElementType = CM_SURF_FMT_R32I;
Heap() : resource_(NULL), baseAddress_(0) {}
bool create(Device& device //!< GPU device object
);
//! Gets the GPU resource associated with the global heap
const Memory& resource() const { return *resource_; }
//! Returns the base virtual address of the heap
uint64_t baseAddress() const { return baseAddress_; }
protected:
Memory* resource_; //!< GPU resource referencing the heap memory
uint64_t baseAddress_; //!< Virtual heap base address
};
//! Locks any access to the virtual GPUs
class ScopedLockVgpus : public amd::StackObject {
public:
//! Default constructor
ScopedLockVgpus(const Device& dev);
//! Destructor
~ScopedLockVgpus();
private:
const Device& dev_; //! Device object
};
//! Interop emulation flags
enum InteropEmulationFlags {
D3D10Device = 0x00000001,
GLContext = 0x00000002,
};
class Engines : public amd::EmbeddedObject {
public:
//! Default constructor
Engines() : numComputeRings_(0), numComputeRingsRT_(0), numDmaEngines_(0) {
memset(desc_, 0xff, sizeof(desc_));
}
//! Creates engine descriptor for this class
void create(uint num, gslEngineDescriptor* desc, uint maxNumComputeRings);
//! Gets engine type mask
uint getMask(gslEngineID id) const { return (1 << id); }
//! Gets a descriptor for the requested engines
uint getRequested(uint engines, gslEngineDescriptor* desc) const;
//! Returns the number of available compute rings
uint numComputeRings() const { return numComputeRings_; }
//! Returns the number of available real time compute rings
uint numComputeRingsRT() const { return numComputeRingsRT_; }
//! Returns the number of available DMA engines
uint numDMAEngines() const { return numDmaEngines_; }
private:
uint numComputeRings_;
uint numComputeRingsRT_;
uint numDmaEngines_;
gslEngineDescriptor desc_[GSL_ENGINEID_MAX]; //!< Engine descriptor
};
//! Transfer buffers
class XferBuffers : public amd::HeapObject {
public:
static const size_t MaxXferBufListSize = 8;
//! Default constructor
XferBuffers(const Device& device, Resource::MemoryType type, size_t bufSize)
: type_(type), bufSize_(bufSize), acquiredCnt_(0), gpuDevice_(device) {}
//! Default destructor
~XferBuffers();
//! Creates the xfer buffers object
bool create();
//! Acquires an instance of the transfer buffers
Memory& acquire();
//! Releases transfer buffer
void release(VirtualGPU& gpu, //!< Virual GPU object used with the buffer
Memory& buffer //!< Transfer buffer for release
);
//! Returns the buffer's size for transfer
size_t bufSize() const { return bufSize_; }
private:
//! Disable copy constructor
XferBuffers(const XferBuffers&);
//! Disable assignment operator
XferBuffers& operator=(const XferBuffers&);
//! Get device object
const Device& dev() const { return gpuDevice_; }
Resource::MemoryType type_; //!< The buffer's type
size_t bufSize_; //!< Staged buffer size
std::list<Memory*> freeBuffers_; //!< The list of free buffers
amd::Atomic<uint> acquiredCnt_; //!< The total number of acquired buffers
amd::Monitor lock_; //!< Staged buffer acquire/release lock
const Device& gpuDevice_; //!< GPU device object
};
struct ScratchBuffer : public amd::HeapObject {
uint regNum_; //!< The number of used scratch registers
Memory* memObj_; //!< Memory objects for scratch buffers
uint64_t offset_; //!< Offset from the global scratch store
uint64_t size_; //!< Scratch buffer size on this queue
//! Default constructor
ScratchBuffer() : regNum_(0), memObj_(NULL), offset_(0), size_(0) {}
//! Default constructor
~ScratchBuffer();
//! Destroys memory objects
void destroyMemory();
};
class SrdManager : public amd::HeapObject {
public:
SrdManager(const Device& dev, uint srdSize, uint bufSize)
: dev_(dev),
numFlags_(bufSize / (srdSize * MaskBits)),
srdSize_(srdSize),
bufSize_(bufSize) {}
~SrdManager();
//! Allocates a new SRD slot for a resource
uint64_t allocSrdSlot(address* cpuAddr);
//! Frees a SRD slot
void freeSrdSlot(uint64_t addr);
// Fills the memory list for VidMM KMD
void fillResourceList(std::vector<const Memory*>& memList);
private:
//! Disable copy constructor
SrdManager(const SrdManager&);
//! Disable assignment operator
SrdManager& operator=(const SrdManager&);
struct Chunk {
Memory* buf_;
uint* flags_;
Chunk() : buf_(NULL), flags_(NULL) {}
};
static const uint MaskBits = 32;
const Device& dev_; //!< GPU device for the chunk manager
amd::Monitor ml_; //!< Global lock for the SRD manager
std::vector<Chunk> pool_; //!< Pool of SRD buffers
uint numFlags_; //!< Total number of flags in array
uint srdSize_; //!< SRD size
uint bufSize_; //!< Buffer size that holds SRDs
};
//! Initialise the whole GPU device subsystem (CAL init, device enumeration, etc).
static bool init();
//! Shutdown the whole GPU device subsystem (CAL shutdown).
static void tearDown();
//! Construct a new physical GPU device
Device();
//! Initialise a device (i.e. all parts of the constructor that could
//! potentially fail)
bool create(CALuint ordinal, //!< GPU device ordinal index. Starts from 0
CALuint numOfDevices //!< number of GPU devices in the system
);
//! Destructor for the physical GPU device
virtual ~Device();
//! Instantiate a new virtual device
device::VirtualDevice* createVirtualDevice(amd::CommandQueue* queue = NULL);
//! Memory allocation
virtual device::Memory* createMemory(amd::Memory& owner //!< abstraction layer memory object
) const;
//! Sampler object allocation
virtual bool createSampler(const amd::Sampler& owner, //!< abstraction layer sampler object
device::Sampler** sampler //!< device sampler object
) const;
//! Allocates a view object from the device memory
virtual device::Memory* createView(
amd::Memory& owner, //!< Owner memory object
const device::Memory& parent //!< Parent device memory object for the view
) const;
//! Create the device program.
virtual device::Program* createProgram(amd::Program& owner, amd::option::Options* options = NULL);
//! Attempt to bind with external graphics API's device/context
virtual bool bindExternalDevice(uint flags, void* const pDevice[], void* pContext,
bool validateOnly);
//! Attempt to unbind with external graphics API's device/context
virtual bool unbindExternalDevice(uint flags, void* const pDevice[], void* pContext,
bool validateOnly);
//! Validates kernel before execution
virtual bool validateKernel(const amd::Kernel& kernel, //!< AMD kernel object
const device::VirtualDevice* vdev,
bool coop_groups = false);
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput);
//! Retrieves information about free memory on a GPU device
virtual bool globalFreeMemory(size_t* freeMemory) const;
//! Returns a GPU memory object from AMD memory object
gpu::Memory* getGpuMemory(amd::Memory* mem //!< Pointer to AMD memory object
) const;
//! Gets the GPU resource associated with the global heap
const Memory& globalMem() const { return heap_.resource(); }
//! Gets the device context object
amd::Context& context() const { return *context_; }
//! Gets the global heap object
const Heap& heap() const { return heap_; }
//! Gets the memory object for the dummy page
amd::Memory* dummyPage() const { return dummyPage_; }
amd::Monitor& lockAsyncOps() const { return *lockAsyncOps_; }
//! Returns the lock object for the virtual gpus list
amd::Monitor* vgpusAccess() const { return vgpusAccess_; }
//! Returns the number of virtual GPUs allocated on this device
uint numOfVgpus() const { return numOfVgpus_; }
uint numOfVgpus_; //!< The number of virtual GPUs (lock protected)
typedef std::vector<VirtualGPU*> VirtualGPUs;
//! Returns the list of all virtual GPUs running on this device
const VirtualGPUs& vgpus() const { return vgpus_; }
VirtualGPUs vgpus_; //!< The list of all running virtual gpus (lock protected)
//! Scratch buffer allocation
gpu::Memory* createScratchBuffer(size_t size //!< Size of buffer
) const;
//! Returns transfer buffer object
XferBuffers& xferWrite() const { return *xferWrite_; }
//! Returns transfer buffer object
XferBuffers& xferRead() const { return *xferRead_; }
//! Finds an appropriate map target
amd::Memory* findMapTarget(size_t size) const;
//! Adds a map target to the cache
bool addMapTarget(amd::Memory* memory) const;
//! Returns resource cache object
ResourceCache& resourceCache() const { return *resourceCache_; }
//! Returns engines object
const Engines& engines() const { return engines_; }
//! Returns engines object
const device::BlitManager& xferMgr() const;
VirtualGPU* xferQueue() const { return xferQueue_; }
//! Retrieves the internal format from the OCL format
CalFormat getCalFormat(const amd::Image::Format& format //! OCL image format
) const;
//! Retrieves the OCL format from the internal image format
amd::Image::Format getOclFormat(const CalFormat& format //! Internal image format
) const;
const ScratchBuffer* scratch(uint idx) const { return scratch_[idx]; }
//! Returns the global scratch buffer
Memory* globalScratchBuf() const { return globalScratchBuf_; };
//! Destroys scratch buffer memory
void destroyScratchBuffers();
//! Initialize heap resources if uninitialized
bool initializeHeapResources();
//! Set GSL sampler to the specified state
void fillHwSampler(uint32_t state, //!< Sampler's OpenCL state
void* hwState, //!< Sampler's HW state
uint32_t hwStateSize, //!< Size of sampler's HW state
uint32_t mipFilter = CL_FILTER_NONE, //!< Mip filter
float minLod = 0.f, //!< Min level of detail
float maxLod = CL_MAXFLOAT //!< Max level of detail
) const;
//! host memory alloc
virtual void* hostAlloc(size_t size, size_t alignment, bool atomics = false) const;
//! SVM allocation
virtual void* svmAlloc(amd::Context& context, size_t size, size_t alignment,
cl_svm_mem_flags flags, void* svmPtr) const;
//! Free host SVM memory
void hostFree(void* ptr, size_t size) const;
//! SVM free
virtual void svmFree(void* ptr) const;
//! Returns SRD manger object
SrdManager& srds() const { return *srdManager_; }
//! Initial the Hardware Debug Manager
int32_t hwDebugManagerInit(amd::Context* context, uintptr_t messageStorage);
private:
//! Disable copy constructor
Device(const Device&);
//! Disable assignment
Device& operator=(const Device&);
//! Sends the stall command to all queues
bool stallQueues();
//! Buffer allocation
gpu::Memory* createBuffer(amd::Memory& owner, //!< Abstraction layer memory object
bool directAccess //!< Use direct host memory access
) const;
//! Image allocation
gpu::Memory* createImage(amd::Memory& owner, //!< Abstraction layer memory object
bool directAccess //!< Use direct host memory access
) const;
//! Allocates/reallocates the scratch buffer, according to the usage
bool allocScratch(uint regNum, //!< Number of the scratch registers
const VirtualGPU* vgpu //!< Virtual GPU for the allocation
);
amd::Context* context_; //!< A dummy context for internal allocations
Heap heap_; //!< GPU global heap
amd::Memory* dummyPage_; //!< A dummy page for NULL pointer
amd::Monitor* lockAsyncOps_; //!< Lock to serialise all async ops on this device
amd::Monitor* lockAsyncOpsForInitHeap_; //!< Lock to serialise all async ops on initialization
//!heap operation
amd::Monitor* vgpusAccess_; //!< Lock to serialise virtual gpu list access
amd::Monitor* scratchAlloc_; //!< Lock to serialise scratch allocation
amd::Monitor* mapCacheOps_; //!< Lock to serialise cache for the map resources
XferBuffers* xferRead_; //!< Transfer buffers read
XferBuffers* xferWrite_; //!< Transfer buffers write
std::vector<amd::Memory*>* mapCache_; //!< Map cache info structure
ResourceCache* resourceCache_; //!< Resource cache
Engines engines_; //!< Available engines on device
bool heapInitComplete_; //!< Keep track of initialization status of heap resources
VirtualGPU* xferQueue_; //!< Transfer queue
std::vector<ScratchBuffer*> scratch_; //!< Scratch buffers for kernels
Memory* globalScratchBuf_; //!< Global scratch buffer
SrdManager* srdManager_; //!< SRD manager object
static AppProfile appProfile_; //!< application profile
};
/*@}*/} // namespace gpu
#endif /*GPU_HPP_*/