// // Copyright 2010 Advanced Micro Devices, Inc. All rights reserved. // #ifndef MEMORY_H_ #define MEMORY_H_ #include "top.hpp" #include "utils/flags.hpp" #include "thread/monitor.hpp" #include "platform/context.hpp" #include "platform/object.hpp" #include "platform/interop.hpp" #include "device/device.hpp" #include #include #include #include #include namespace device { class Memory; class VirtualDevice; } namespace amd { // Forward declaration of the amd::Image and amd::Buffer classes. class Image; class Buffer; class Pipe; struct BufferRect : public amd::EmbeddedObject { //! Default constructor BufferRect() : rowPitch_(0) , slicePitch_(0) , start_(0) , end_(0) { } //! Creates BufferRect object bool create( const size_t* bufferOrigin, //!< Start locaiton in the buffer const size_t* region, //!< Copy region size_t bufferRowPitch, //!< Provided buffer's row pitch size_t bufferSlicePitch //!< Provided buffer's slice pitch ); //! Returns the plain offset for the (X, Y, Z) location size_t offset( size_t x, //!< Coordinate in X dimension size_t y, //!< Coordinate in Y dimension size_t z //!< Coordinate in Z dimension ) const { return start_ + x + y * rowPitch_ + z * slicePitch_; } size_t rowPitch_; //!< Calculated row pitch for the buffer rect size_t slicePitch_; //!< Calculated slice pitch for the buffer rect size_t start_; //!< Start offset for the copy region size_t end_; //!< Relative end offset from start for the copy region }; class HostMemoryReference { public: //! Default constructor HostMemoryReference(void* hostMem = NULL) : alloced_(false) , hostMem_(hostMem) , size_(0) {} //! Default destructor ~HostMemoryReference() { assert(!alloced_ && "Host buffer not deallocated"); } //! Creates host memory reference object bool allocateMemory(size_t size, const Context& context); // Frees system memory if it was allocated void deallocateMemory(const Context& context); //! Get the host memory pointer void* hostMem() const { return hostMem_; } //! Get the host memory size size_t size() const { return size_; } //! Set the host memory pointer void setHostMem(void* hostMem, const Context& context) { deallocateMemory(context); hostMem_ = hostMem; } //! Returns true if the host memory has been allocated by this object, false // if it has been allocated elsewhere. bool alloced() const { return alloced_; } private: //! Disable copy constructor HostMemoryReference(const HostMemoryReference&); //! Disable operator= HostMemoryReference& operator=(const HostMemoryReference&); bool alloced_; //!< TRUE if memory was allocated void* hostMem_; //!< Host memory pointer size_t size_; //!< The host memory size }; class Memory: public amd::RuntimeObject { typedef void (CL_CALLBACK * DestructorCallBackFunction)( cl_mem memobj, void *user_data); enum AllocState { AllocInit = 0, AllocCreate = 1, AllocComplete = 2, AllocRealloced = 3 }; struct DestructorCallBackEntry { struct DestructorCallBackEntry* next_; DestructorCallBackFunction callback_; void* data_; DestructorCallBackEntry( DestructorCallBackFunction callback, void* data) : callback_(callback), data_(data) { } }; protected: typedef cl_mem_object_type Type; typedef cl_mem_flags Flags; typedef DeviceMap DeviceMemory; size_t numDevices_; //!< Number of devices //! The device memory objects included in this memory DeviceMemory* deviceMemories_; //! The device alloced state std::map deviceAlloced_; //! Linked list of destructor callbacks. std::atomic destructorCallbacks_; SharedReference context_; //!< Owning context Memory* parent_; const Type type_; //!< Object type (Buffer, Image2D, Image3D) HostMemoryReference hostMemRef_; //!< Host-side memory reference(or NULL if none) size_t origin_; size_t size_; //!< Size in bytes Flags flags_; //!< Construction flags size_t version_; //!< Update count, used for coherency const Device* lastWriter_; //!< Which device wrote most recently (NULL if host) InteropObject* interopObj_; //!< Interop object bool isParent_; //!< This object is a parent device::VirtualDevice* vDev_; //!< Memory object belongs to a virtual device only bool forceSysMemAlloc_; //!< Forces system memory allocation std::atomic_uint mapCount_; //!< Keep track of number of mappings for a memory object void * svmHostAddress_; //!< svm host address; bool svmPtrCommited_; //!< svm host address committed flag; bool canBeCached_; //!< flag to if the object can be cached; private: //! Disable default assignment operator Memory& operator=(const Memory&); //! Disable default copy operator Memory(const Memory&); Monitor lockMemoryOps_; //!< Lock to serialize memory operations std::list subBuffers_; //!< List of all subbuffers for this memory object protected: //! The constructor creates a memory object but does not allocate either host memory //! or device memory. Default parameters are appropriate for Buffer creation. Memory( Context& context, //!< Context object Type type, //!< Memory type Flags flags, //!< Object's flags size_t size, //!< Memory size void* svmPtr = NULL //!< svm host memory address, NULL if no SVM mem object ); Memory( Memory& parent, //!< Context object Flags flags, //!< Object's flags size_t offset, //!< Memory offset size_t size, //!< Memory size Type type = 0 //!< Memory type ); //! Memory object destructor virtual ~Memory(); //! Copies initialization data to the backing store virtual void copyToBackingStore( void* initFrom //!< Pointer to the initialization memory ); //! Initializes the device memory array virtual void initDeviceMemory(); void setSize(size_t size) { size_ = size; } void setInteropObj(InteropObject* obj) { interopObj_ = obj; } public: //! Placement new operator. void* operator new( size_t size, //!< Original allocation size const Context& context //!< Context this memory object is allocated in. ); // Provide a "matching" placement delete operator. void operator delete( void*, //!< Pointer to deallocate const Context& context //!< Context this memory object is allocated in. ); // and a regular delete operator to satisfy synthesized methods. void operator delete( void* //!< Pointer to deallocate ); //! Returns the memory lock object amd::Monitor& lockMemoryOps() { return lockMemoryOps_; } //! Adds a view into the list void addSubBuffer(Memory* item); //! virtual function used to distinguish memory objects from other CL objects virtual ObjectType objectType() const {return ObjectTypeMemory;} //! Removes a subbuffer from the list void removeSubBuffer(Memory* item); //! Returns the list of all subbuffers std::list& subBuffers() { return subBuffers_; } //! Returns the number of devices size_t numDevices() const { return numDevices_; } //! static_cast to Buffer with sanity check virtual Buffer* asBuffer() { return NULL; } //! static_cast to Image with sanity check virtual Image* asImage() { return NULL; } //! static_cast to Pipe with sanity check virtual Pipe* asPipe() { return NULL; } //! Creates and initializes device (cache) memory for all devices virtual bool create( void* initFrom = NULL, //!< Pointer to the initialization data bool sysMemAlloc = false //!< Allocate device memory in system memory ); //! Allocates device (cache) memory for a specific device bool addDeviceMemory( const Device* dev //!< Device object ); //! Replaces device (cache) memory for a specific device void replaceDeviceMemory( const Device* dev, //!< Device object device::Memory* dm //!< New device memory object for replacement ); //! Find the section for the given device. Return NULL if not found. device::Memory* getDeviceMemory( const Device& dev, //!< Device object bool alloc = true //!< Allocates memory ); //! Allocate host memory (as required) bool allocHostMemory( void* initFrom, //!< Host memory provided by the application bool allocHostMem, //!< Force system memory allocation bool forceCopy = false //!< Force system memory allocation ); //! Checks if memory was reallocated bool reallocedDeviceMemory(const Device* dev) { return (AllocRealloced == deviceAlloced_[dev]) ? true : false; } // Accessors Memory* parent() const { return parent_; } bool isParent() const { return isParent_; } size_t getOrigin() const { return origin_; } size_t getSize() const { return size_; } Flags getMemFlags() const { return flags_; } Type getType() const { return type_; } const Device* getLastWriter() { return lastWriter_; } const HostMemoryReference* getHostMemRef() const { return &hostMemRef_; } void* getHostMem() const { return hostMemRef_.hostMem(); } void setHostMem(void* mem) { hostMemRef_.setHostMem(mem, context_()); } size_t getVersion() const { return version_; } Context& getContext() const { return context_(); } bool isInterop() const { return (getInteropObj() != NULL) ? true : false ; } InteropObject* getInteropObj() const { return interopObj_; } bool setDestructorCallback(DestructorCallBackFunction callback, void* data); //! Signal that a write has occurred to a cached version void signalWrite(const Device* writer); //! Force an asynchronous writeback from the most-recent dirty cache to host void cacheWriteBack(void); //! For CPU device only! //! Base functions for mapping/unmapping GL/D3D objects //! Functions may be left empty, if not needed //! Virtual member function mapExtObjectInCQThread() maps a GL object //! and store CPU memory pointer in Memory::hostMem_. //! Returns true if ok, false 0 if error(s) virtual bool mapExtObjectInCQThread(void) { return true;} //! Virtual member functions unmapExtObjectInCQThread() unmaps a GL object //! and clears pointer Memory::hostMem_. //! Returns true if ok, false 0 if error(s) virtual bool unmapExtObjectInCQThread(void) { return true; } //! Returns true if the specified area covers memory intirely virtual bool isEntirelyCovered( const Coord3D& origin, //!< Origin location of the covered region const Coord3D& region //!< Covered region dimensions ) const = 0; //! Returns true if the specified area is not degenerate and is inside of allocated memory virtual bool validateRegion( const Coord3D& origin, //!< Origin location of the covered region const Coord3D& region //!< Covered region dimensions ) const = 0; void setVirtualDevice(device::VirtualDevice* vDev) { vDev_ = vDev; } device::VirtualDevice* getVirtualDevice() const { return vDev_; } bool forceSysMemAlloc() const { return forceSysMemAlloc_; } void incMapCount() { ++mapCount_; } void decMapCount() { --mapCount_; } uint mapCount() const { return mapCount_; } bool usesSvmPointer() const; void * getSvmPtr() const { return svmHostAddress_; } //!< svm pointer accessor; void setSvmPtr(void * ptr) { svmHostAddress_ = ptr; } //!< svm pointer setter; bool isSvmPtrCommited() const { return svmPtrCommited_; } //!< svm host address committed accessor; void commitSvmMemory(); //!< svm host address committed accessor; void setCacheStatus(bool canBeCached) { canBeCached_ = canBeCached; }//!< set the memobject cached status; bool canBeCached() const { return canBeCached_; } //!< get the memobject cached status; }; //! Buffers are a specialization of memory. Just a wrapper, really, //! but this gives us flexibility for later changes. class Buffer: public Memory { protected: cl_bus_address_amd busAddress_; //! Initializes the device memory array which is nested // after'Image1DD3D10' object in memory layout. virtual void initDeviceMemory(); Buffer(Context& context, Type type, Flags flags, size_t size) : Memory(context, type, flags, size) { } public: Buffer(Context& context, Flags flags, size_t size, void* svmPtr = NULL) : Memory(context, CL_MEM_OBJECT_BUFFER, flags, size, svmPtr) { } Buffer(Memory& parent, Flags flags, size_t origin, size_t size) : Memory(parent, flags, origin, size) { } bool create( void* initFrom = NULL, //!< Pointer to the initialization data bool sysMemAlloc = false //!< Allocate device memory in system memory ); //! static_cast to Buffer with sanity check virtual Buffer* asBuffer() { return this; } //! Returns true if the specified area covers buffer entirely bool isEntirelyCovered( const Coord3D& origin, //!< Origin location of the covered region const Coord3D& region //!< Covered region dimensions ) const; //! Returns true if the specified area is not degenerate and is inside of allocated memory bool validateRegion( const Coord3D& origin, //!< Origin location of the covered region const Coord3D& region //!< Covered region dimensions ) const; cl_bus_address_amd busAddress() const { return busAddress_; } }; //! Pipes are a specialization of Buffers. class Pipe: public Buffer { protected: size_t packetSize_; //!< Size in bytes of pipe packet size_t maxPackets_; //!< Number of max pipe packets bool initialized_; //!< Mark if the pipe is initialized virtual void initDeviceMemory(); public: Pipe(Context& context, Flags flags, size_t size, size_t pipe_packet_size, size_t pipe_max_packets) : Buffer(context, CL_MEM_OBJECT_PIPE, flags, size) , initialized_(false) { packetSize_ = pipe_packet_size; maxPackets_ = pipe_max_packets; } //! static_cast to Pipe with sanity check virtual Pipe* asPipe() { return this; } //! Returns pipe size pitch in bytes size_t getPacketSize() const { return packetSize_; } //! return max number of pipe packets size_t getMaxNumPackets() const { return maxPackets_; } }; //! Images are a specialization of memory class Image : public Memory { public: // declaration of list of supported formats static cl_image_format supportedFormats[]; static cl_image_format supportedFormatsRA[]; static cl_image_format supportedDepthStencilFormats[]; static cl_uint numSupportedFormats(const Context& context, cl_mem_object_type image_type, cl_mem_flags flags = 0); static cl_uint getSupportedFormats( const Context& context, cl_mem_object_type image_type, const cl_uint num_entries, cl_image_format *image_formats, cl_mem_flags flags = 0); //! Helper struct to manipulate image formats. struct Format : public cl_image_format { //! Construct a new ImageFormat wrapper. Format(const cl_image_format& format) { image_channel_order = format.image_channel_order; image_channel_data_type = format.image_channel_data_type; } //! Return true if this is a valid image format, false otherwise. bool isValid() const; //! Returns true if this format is supported by runtime, false otherwise bool isSupported(const Context& context, cl_mem_object_type image_type = 0, cl_mem_flags flags = 0) const; //! Compare 2 image formats. bool operator == (const Format& rhs) const { return image_channel_order == rhs.image_channel_order && image_channel_data_type == rhs.image_channel_data_type; } bool operator != (const Format& rhs) const { return !(*this == rhs); } //! Return the number of channels. size_t getNumChannels() const; //! Return the element size in bytes. size_t getElementSize() const; //! Get the channel order by indices. R = 0, G = 1, B = 2, A = 3. void getChannelOrder(uint8_t* channelOrder) const; //! Adjust colorRGBA according to format, and set it in colorFormat. void formatColor(const void* colorRGBA, void* colorFormat) const; }; struct Impl { amd::Coord3D region_; //!< Image size size_t rp_; //!< Image row pitch size_t sp_; //!< Image slice pitch const Format format_; //!< Image format void* reserved_; size_t bp_; Impl(const Format& format, Coord3D region, size_t rp, size_t sp = 0, size_t bp = 0) : region_(region), rp_(rp), sp_(sp), format_(format), bp_(bp) { DEBUG_ONLY(reserved_ = NULL); } }; private: Impl impl_; //!< Image object description size_t dim_; //!< Image dimension uint mipLevels_; //!< The number of mip levels uint baseMipLevel_; //!< The base mip level for a view protected: Image( const Format& format, Image& parent, uint baseMipLevel = 0); ///! Initializes the device memory array which is nested // after'Image' object in memory layout. virtual void initDeviceMemory(); //! Copies initialization data to the backing store virtual void copyToBackingStore( void* initFrom //!< Pointer to the initialization memory ); void initDimension(); public: Image( Context& context, Type type, Flags flags, const Format& format, size_t width, size_t height, size_t depth, size_t rowPitch, size_t slicePitch, uint mipLevels = 1); Image( Buffer& buffer, Type type, Flags flags, const Format& format, size_t width, size_t height, size_t depth, size_t rowPitch, size_t slicePitch); //! Validate image dimensions with supported sizes static bool validateDimensions( const std::vector& devices, //!< List of devices for validation cl_mem_object_type type, //!< Image type size_t width, //!< Image width size_t height, //!< Image height size_t depth, //!< Image depth size_t arraySize //!< Image array size ); const Format& getImageFormat() const {return impl_.format_;} //! static_cast to Buffer with sanity check virtual Image* asImage() { return this; } //! Returns true if specified area covers image entirely bool isEntirelyCovered( const Coord3D& origin, //!< Origin location of the covered region const Coord3D& region //!< Covered region dimensions ) const; //! Returns true if the specified area is not degenerate and is inside of allocated memory bool validateRegion( const Coord3D& origin, //!< Origin location of the covered region const Coord3D& region //!< Covered region dimensions ) const; //! Returns true if the slice value for the image is valid bool isRowSliceValid( size_t rowPitch, //!< The row pitch value size_t slicePitch, //!< The slice pitch value size_t width, //!< The width of the copy region size_t height //!< The height of the copy region ) const; //! Creates a view memory object virtual Image* createView( const Context& context, //!< Context for a view creation const Format& format, //!< The new format for a view device::VirtualDevice* vDev, //!< Virtual device object uint baseMipLevel = 0 //!< Base mip level for a view ); //! Returns the impl for this image. Impl& getImpl() { return impl_; } //! Returns the number of dimensions. size_t getDims() const { return dim_; } //! Base virtual methods to be overridden in derived image classes //! //! Returns width of image in pixels size_t getWidth() const { return impl_.region_[0]; } //! Returns height of image in pixels size_t getHeight() const { return impl_.region_[1]; } //! Returns image's row pitch in bytes size_t getRowPitch() const { return impl_.rp_; } //! Returns image's byte pitch size_t getBytePitch() const { return impl_.bp_; } //! Returns depth of the image in pixels/slices size_t getDepth() const { return impl_.region_[2]; } //! Returns image's slice pitch in bytes size_t getSlicePitch() const { return impl_.sp_; } //! Returns image's slice pitch in bytes uint getMipLevels() const { return mipLevels_; } //! Returns image's slice pitch in bytes uint getBaseMipLevel() const { return baseMipLevel_; } //! Get the image covered region const Coord3D& getRegion() const { return impl_.region_; } //! Sets the byte pitch obtained from HWL void setBytePitch(size_t bytePitch) { impl_.bp_ = bytePitch; } //! Creates and initializes device (cache) memory for all devices bool create( void* initFrom = NULL //!< Pointer to the initialization data ); }; //! SVM-related functionality. class SvmBuffer : AllStatic { public: //! Allocate a shared buffer that is accessible by all devices in the context static void* malloc( Context& context, cl_svm_mem_flags flags, size_t size, size_t alignment); //! Release shared buffer static void free(const Context& context, void* ptr); //! Fill the destination buffer \a dst with the contents of the source //! buffer \a src \times times. static void memFill( void* dst, const void* src, size_t srcSize, size_t times); //! Return true if \a ptr is a pointer allocated using SvmBuffer::malloc //! that has not been deallocated afterwards static bool malloced(const void* ptr); private: static void Add(uintptr_t k, uintptr_t v); static void Remove(uintptr_t k); static bool Contains(uintptr_t ptr); static std::map Allocated_; // !< Allocated buffers static Monitor AllocatedLock_; }; //! Liquid flash extension class LiquidFlashFile : public RuntimeObject { private: const wchar_t* name_; cl_file_flags_amd flags_; void* handle_; uint32_t blockSize_; uint64_t fileSize_; public: LiquidFlashFile(const wchar_t* name, cl_file_flags_amd flags) : name_(name), flags_(flags), handle_(NULL) ,blockSize_(0),fileSize_(0) { } ~LiquidFlashFile(); bool open(); void close(); uint32_t blockSize() const { return blockSize_; }; uint64_t fileSize() const { return fileSize_; }; bool readBlock( void* dst, uint64_t fileOffset, uint64_t bufferOffset, uint64_t size) const; virtual ObjectType objectType() const { return ObjectTypeLiquidFlashFile; } }; } // namespace amd #endif // MEMORY_H_