From dacb39cafaf0476658c51b97fd85b79a5d51a88b Mon Sep 17 00:00:00 2001 From: foreman Date: Tue, 24 Apr 2018 18:35:32 -0400 Subject: [PATCH] P4 to Git Change 1545895 by gandryey@gera-w8 on 2018/04/24 18:25:56 SWDEV-79445 - OCL generic changes and code clean-up - Replace dynamic memory object allocation for the managed buffer with a preallocated object Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#298 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.cpp#9 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.hpp#8 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#5 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#64 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#21 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#94 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#50 edit [ROCm/clr commit: 2f41f4197739002bc94d2e4d510c067bcefc9400] --- projects/clr/rocclr/runtime/device/device.hpp | 9 ++-- .../rocclr/runtime/device/pal/palconstbuf.cpp | 45 +++++++------------ .../rocclr/runtime/device/pal/palconstbuf.hpp | 10 +++-- .../rocclr/runtime/device/pal/palmemory.hpp | 6 +++ .../rocclr/runtime/device/pal/palresource.cpp | 4 ++ .../rocclr/runtime/device/pal/palresource.hpp | 22 +++++++++ .../rocclr/runtime/device/pal/palvirtual.cpp | 2 +- .../rocclr/runtime/device/pal/palvirtual.hpp | 2 +- 8 files changed, 61 insertions(+), 39 deletions(-) diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp index 6671639b65..8942a6b9f5 100644 --- a/projects/clr/rocclr/runtime/device/device.hpp +++ b/projects/clr/rocclr/runtime/device/device.hpp @@ -838,15 +838,14 @@ class Memory : public amd::HeapObject { //! Decrement map count virtual void decIndMapCount() {} + size_t size_; //!< Memory size + private: //! Disable default copy constructor - Memory& operator=(const Memory&); + Memory& operator=(const Memory&) = delete; //! Disable operator= - Memory(const Memory&); - - //! Our size - size_t size_; + Memory(const Memory&) = delete; }; class Sampler : public amd::HeapObject { diff --git a/projects/clr/rocclr/runtime/device/pal/palconstbuf.cpp b/projects/clr/rocclr/runtime/device/pal/palconstbuf.cpp index bffa902e0a..9121b3f6a2 100644 --- a/projects/clr/rocclr/runtime/device/pal/palconstbuf.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palconstbuf.cpp @@ -135,39 +135,28 @@ uint64_t ConstantBuffer::UploadDataToHw(const void* sysmem, uint32_t size) const } // ================================================================================================ -XferBuffer::XferBuffer(ManagedBuffer& mbuf, uint32_t size) - : mbuf_(mbuf) - , size_(size) -{} +XferBuffer::XferBuffer(const Device& device, ManagedBuffer& mbuf, uint32_t size) + : buffer_view_(device, size) + , mbuf_(mbuf) + , size_(size) { + // Create a view for access + Resource::ViewParams params = {}; + params.gpu_ = &mbuf_.gpu(); + params.offset_ = 0; + params.size_ = size_; + params.resource_ = mbuf_.activeMemory(); + bool result = buffer_view_.create(Resource::View, ¶ms); + assert(result && "View creaiton should never return an error!"); +} // ================================================================================================ -Memory& XferBuffer::Acquire(uint32_t size) const -{ +Memory& XferBuffer::Acquire(uint32_t size) { uint64_t vm_address; // Reserve space in the managed buffer address cpu_address = mbuf_.reserve(size, &vm_address); - // Create a view for access - Memory* mem = new Memory(mbuf_.gpu().dev(), static_cast(size)); - Resource::ViewParams params = {}; - params.gpu_ = &mbuf_.gpu(); - params.offset_ = vm_address - mbuf_.vmAddress(); - params.size_ = size; - params.resource_ = mbuf_.activeMemory(); - if (nullptr == mem || !mem->create(Resource::View, ¶ms)) { - delete mem; - // If the suballocaiton failed for some reason, then return the top of the active buffer - return mbuf_.reserveAtTheTop(size); - } - return *mem; -} - -// ================================================================================================ -void XferBuffer::Release(Memory& mem) const -{ - // Delete view - if (mem.desc().type_ == Resource::View) { - delete &mem; - } + // Update a view for access + buffer_view_.updateView(mbuf_.activeMemory(), vm_address - mbuf_.vmAddress(), size); + return buffer_view_; } } // namespace pal diff --git a/projects/clr/rocclr/runtime/device/pal/palconstbuf.hpp b/projects/clr/rocclr/runtime/device/pal/palconstbuf.hpp index 5ab7d5d67f..da1984f636 100644 --- a/projects/clr/rocclr/runtime/device/pal/palconstbuf.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palconstbuf.hpp @@ -116,8 +116,9 @@ private: class XferBuffer : public amd::EmbeddedObject { public: //! Constructor for the ConstBuffer class - XferBuffer(ManagedBuffer& mbuf, //!< Managed buffer - uint32_t size //!< Maximum size of the transfer buffer + XferBuffer(const Device& device, //!< Active GPU device + ManagedBuffer& mbuf, //!< Managed buffer + uint32_t size //!< Maximum size of the transfer buffer ); //! Destructor for the ConstBuffer class @@ -128,11 +129,11 @@ public: * \return GPU memory object associated with free memory */ Memory& Acquire(uint32_t size //!< data size for transfers - ) const; + ); //! Releases memory object used in the staging transfer void Release(Memory& mem //!< Memory object for release - ) const; + ) { buffer_view_.updateView(nullptr, 0, 0); } size_t MaxSize() const { return static_cast(size_); } @@ -143,6 +144,7 @@ private: //! Disable operator= XferBuffer& operator=(const XferBuffer&) = delete; + Memory buffer_view_; //!< Buffer view returned in the acquire ManagedBuffer& mbuf_; //!< Managed buffer on GPU uint32_t size_; //!< Mx staging buffer size }; diff --git a/projects/clr/rocclr/runtime/device/pal/palmemory.hpp b/projects/clr/rocclr/runtime/device/pal/palmemory.hpp index bb22f907df..6825c8b8c5 100644 --- a/projects/clr/rocclr/runtime/device/pal/palmemory.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palmemory.hpp @@ -142,6 +142,12 @@ class Memory : public device::Memory, public Resource { (CL_MEM_ALLOC_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY))); } + //! Quick view update for managed buffers. It should avoid expensive object allocations + void updateView(Resource* view, size_t offset, size_t size) { + size_ = size; + Resource::updateView(view, offset, size); + } + protected: //! Decrement map count void decIndMapCount(); diff --git a/projects/clr/rocclr/runtime/device/pal/palresource.cpp b/projects/clr/rocclr/runtime/device/pal/palresource.cpp index 7b69803c4e..178e79055e 100644 --- a/projects/clr/rocclr/runtime/device/pal/palresource.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palresource.cpp @@ -321,6 +321,8 @@ Resource::~Resource() { image_->Destroy(); delete[] reinterpret_cast(image_); } + + // Remove the current resource from the global resource list gpuDevice_.removeResource(this); } @@ -1225,6 +1227,8 @@ void Resource::free() if (!desc().buffer_) { dev().srds().freeSrdSlot(hwSrd_); } + + memRef_ = nullptr; } // ================================================================================================ diff --git a/projects/clr/rocclr/runtime/device/pal/palresource.hpp b/projects/clr/rocclr/runtime/device/pal/palresource.hpp index 47d1a6eb13..a838f2673a 100644 --- a/projects/clr/rocclr/runtime/device/pal/palresource.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palresource.hpp @@ -359,6 +359,28 @@ class Resource : public amd::HeapObject { //! Erase an entry in the array for provided queue index void eraseGpuEvents(uint index) { events_.erase(events_.begin() + index); } + //! Quick view update for managed buffers. It should avoid expensive object allocations + //! If the base resource is null, then the view is released + void updateView(Resource* base, size_t offset, size_t size) { + if (base == nullptr) { + desc_.type_ = Empty; + memRef_->release(); + memRef_ = nullptr; + viewOwner_ = nullptr; + } else { + desc_.type_ = View; + viewOwner_ = base; + offset_ = offset + viewOwner_->offset(); + assert(viewOwner_->data() != nullptr && "CPU access must be provide for this call!"); + address_ = viewOwner_->data() + offset; + desc_.cardMemory_ = viewOwner_->desc().cardMemory_; + memRef_ = viewOwner_->memRef_; + memRef_->retain(); + desc_.width_ = amd::alignUp(size, Pal::Formats::BytesPerPixel(Pal::ChNumFormat::X32_Uint)) / + Pal::Formats::BytesPerPixel(Pal::ChNumFormat::X32_Uint); + } + } + protected: /*! \brief Creates a PAL iamge object, associated with the resource * diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp index 206d4f5f15..df7a509a3f 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp @@ -688,7 +688,7 @@ VirtualGPU::VirtualGPU(Device& device) tsCache_(nullptr), dmaFlushMgmt_(device), managedBuffer_(*this, device.settings().stagedXferSize_ + 32 * Ki), - writeBuffer_(managedBuffer_, device.settings().stagedXferSize_), + writeBuffer_(device, managedBuffer_, device.settings().stagedXferSize_), hwRing_(0), readjustTimeGPU_(0), lastTS_(nullptr), diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp index 2d73accdf0..88442ed901 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp @@ -376,7 +376,7 @@ class VirtualGPU : public device::VirtualDevice { ); //! Return xfer buffer for staging operations - const XferBuffer& xferWrite() const { return writeBuffer_; } + XferBuffer& xferWrite() { return writeBuffer_; } //! Adds a pinned memory object into a map void addPinnedMem(amd::Memory* mem);