From dacb39cafaf0476658c51b97fd85b79a5d51a88b Mon Sep 17 00:00:00 2001
From: foreman
Date: Tue, 24 Apr 2018 18:35:32 -0400
Subject: [PATCH] P4 to Git Change 1545895 by gandryey@gera-w8 on 2018/04/24
18:25:56
SWDEV-79445 - OCL generic changes and code clean-up
- Replace dynamic memory object allocation for the managed buffer with a preallocated object
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/device.hpp#298 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.cpp#9 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.hpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palmemory.hpp#5 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#64 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#21 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#94 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#50 edit
[ROCm/clr commit: 2f41f4197739002bc94d2e4d510c067bcefc9400]
---
projects/clr/rocclr/runtime/device/device.hpp | 9 ++--
.../rocclr/runtime/device/pal/palconstbuf.cpp | 45 +++++++------------
.../rocclr/runtime/device/pal/palconstbuf.hpp | 10 +++--
.../rocclr/runtime/device/pal/palmemory.hpp | 6 +++
.../rocclr/runtime/device/pal/palresource.cpp | 4 ++
.../rocclr/runtime/device/pal/palresource.hpp | 22 +++++++++
.../rocclr/runtime/device/pal/palvirtual.cpp | 2 +-
.../rocclr/runtime/device/pal/palvirtual.hpp | 2 +-
8 files changed, 61 insertions(+), 39 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/device.hpp b/projects/clr/rocclr/runtime/device/device.hpp
index 6671639b65..8942a6b9f5 100644
--- a/projects/clr/rocclr/runtime/device/device.hpp
+++ b/projects/clr/rocclr/runtime/device/device.hpp
@@ -838,15 +838,14 @@ class Memory : public amd::HeapObject {
//! Decrement map count
virtual void decIndMapCount() {}
+ size_t size_; //!< Memory size
+
private:
//! Disable default copy constructor
- Memory& operator=(const Memory&);
+ Memory& operator=(const Memory&) = delete;
//! Disable operator=
- Memory(const Memory&);
-
- //! Our size
- size_t size_;
+ Memory(const Memory&) = delete;
};
class Sampler : public amd::HeapObject {
diff --git a/projects/clr/rocclr/runtime/device/pal/palconstbuf.cpp b/projects/clr/rocclr/runtime/device/pal/palconstbuf.cpp
index bffa902e0a..9121b3f6a2 100644
--- a/projects/clr/rocclr/runtime/device/pal/palconstbuf.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palconstbuf.cpp
@@ -135,39 +135,28 @@ uint64_t ConstantBuffer::UploadDataToHw(const void* sysmem, uint32_t size) const
}
// ================================================================================================
-XferBuffer::XferBuffer(ManagedBuffer& mbuf, uint32_t size)
- : mbuf_(mbuf)
- , size_(size)
-{}
+XferBuffer::XferBuffer(const Device& device, ManagedBuffer& mbuf, uint32_t size)
+ : buffer_view_(device, size)
+ , mbuf_(mbuf)
+ , size_(size) {
+ // Create a view for access
+ Resource::ViewParams params = {};
+ params.gpu_ = &mbuf_.gpu();
+ params.offset_ = 0;
+ params.size_ = size_;
+ params.resource_ = mbuf_.activeMemory();
+ bool result = buffer_view_.create(Resource::View, ¶ms);
+ assert(result && "View creaiton should never return an error!");
+}
// ================================================================================================
-Memory& XferBuffer::Acquire(uint32_t size) const
-{
+Memory& XferBuffer::Acquire(uint32_t size) {
uint64_t vm_address;
// Reserve space in the managed buffer
address cpu_address = mbuf_.reserve(size, &vm_address);
- // Create a view for access
- Memory* mem = new Memory(mbuf_.gpu().dev(), static_cast(size));
- Resource::ViewParams params = {};
- params.gpu_ = &mbuf_.gpu();
- params.offset_ = vm_address - mbuf_.vmAddress();
- params.size_ = size;
- params.resource_ = mbuf_.activeMemory();
- if (nullptr == mem || !mem->create(Resource::View, ¶ms)) {
- delete mem;
- // If the suballocaiton failed for some reason, then return the top of the active buffer
- return mbuf_.reserveAtTheTop(size);
- }
- return *mem;
-}
-
-// ================================================================================================
-void XferBuffer::Release(Memory& mem) const
-{
- // Delete view
- if (mem.desc().type_ == Resource::View) {
- delete &mem;
- }
+ // Update a view for access
+ buffer_view_.updateView(mbuf_.activeMemory(), vm_address - mbuf_.vmAddress(), size);
+ return buffer_view_;
}
} // namespace pal
diff --git a/projects/clr/rocclr/runtime/device/pal/palconstbuf.hpp b/projects/clr/rocclr/runtime/device/pal/palconstbuf.hpp
index 5ab7d5d67f..da1984f636 100644
--- a/projects/clr/rocclr/runtime/device/pal/palconstbuf.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palconstbuf.hpp
@@ -116,8 +116,9 @@ private:
class XferBuffer : public amd::EmbeddedObject {
public:
//! Constructor for the ConstBuffer class
- XferBuffer(ManagedBuffer& mbuf, //!< Managed buffer
- uint32_t size //!< Maximum size of the transfer buffer
+ XferBuffer(const Device& device, //!< Active GPU device
+ ManagedBuffer& mbuf, //!< Managed buffer
+ uint32_t size //!< Maximum size of the transfer buffer
);
//! Destructor for the ConstBuffer class
@@ -128,11 +129,11 @@ public:
* \return GPU memory object associated with free memory
*/
Memory& Acquire(uint32_t size //!< data size for transfers
- ) const;
+ );
//! Releases memory object used in the staging transfer
void Release(Memory& mem //!< Memory object for release
- ) const;
+ ) { buffer_view_.updateView(nullptr, 0, 0); }
size_t MaxSize() const { return static_cast(size_); }
@@ -143,6 +144,7 @@ private:
//! Disable operator=
XferBuffer& operator=(const XferBuffer&) = delete;
+ Memory buffer_view_; //!< Buffer view returned in the acquire
ManagedBuffer& mbuf_; //!< Managed buffer on GPU
uint32_t size_; //!< Mx staging buffer size
};
diff --git a/projects/clr/rocclr/runtime/device/pal/palmemory.hpp b/projects/clr/rocclr/runtime/device/pal/palmemory.hpp
index bb22f907df..6825c8b8c5 100644
--- a/projects/clr/rocclr/runtime/device/pal/palmemory.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palmemory.hpp
@@ -142,6 +142,12 @@ class Memory : public device::Memory, public Resource {
(CL_MEM_ALLOC_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY)));
}
+ //! Quick view update for managed buffers. It should avoid expensive object allocations
+ void updateView(Resource* view, size_t offset, size_t size) {
+ size_ = size;
+ Resource::updateView(view, offset, size);
+ }
+
protected:
//! Decrement map count
void decIndMapCount();
diff --git a/projects/clr/rocclr/runtime/device/pal/palresource.cpp b/projects/clr/rocclr/runtime/device/pal/palresource.cpp
index 7b69803c4e..178e79055e 100644
--- a/projects/clr/rocclr/runtime/device/pal/palresource.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palresource.cpp
@@ -321,6 +321,8 @@ Resource::~Resource() {
image_->Destroy();
delete[] reinterpret_cast(image_);
}
+
+ // Remove the current resource from the global resource list
gpuDevice_.removeResource(this);
}
@@ -1225,6 +1227,8 @@ void Resource::free()
if (!desc().buffer_) {
dev().srds().freeSrdSlot(hwSrd_);
}
+
+ memRef_ = nullptr;
}
// ================================================================================================
diff --git a/projects/clr/rocclr/runtime/device/pal/palresource.hpp b/projects/clr/rocclr/runtime/device/pal/palresource.hpp
index 47d1a6eb13..a838f2673a 100644
--- a/projects/clr/rocclr/runtime/device/pal/palresource.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palresource.hpp
@@ -359,6 +359,28 @@ class Resource : public amd::HeapObject {
//! Erase an entry in the array for provided queue index
void eraseGpuEvents(uint index) { events_.erase(events_.begin() + index); }
+ //! Quick view update for managed buffers. It should avoid expensive object allocations
+ //! If the base resource is null, then the view is released
+ void updateView(Resource* base, size_t offset, size_t size) {
+ if (base == nullptr) {
+ desc_.type_ = Empty;
+ memRef_->release();
+ memRef_ = nullptr;
+ viewOwner_ = nullptr;
+ } else {
+ desc_.type_ = View;
+ viewOwner_ = base;
+ offset_ = offset + viewOwner_->offset();
+ assert(viewOwner_->data() != nullptr && "CPU access must be provide for this call!");
+ address_ = viewOwner_->data() + offset;
+ desc_.cardMemory_ = viewOwner_->desc().cardMemory_;
+ memRef_ = viewOwner_->memRef_;
+ memRef_->retain();
+ desc_.width_ = amd::alignUp(size, Pal::Formats::BytesPerPixel(Pal::ChNumFormat::X32_Uint)) /
+ Pal::Formats::BytesPerPixel(Pal::ChNumFormat::X32_Uint);
+ }
+ }
+
protected:
/*! \brief Creates a PAL iamge object, associated with the resource
*
diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
index 206d4f5f15..df7a509a3f 100644
--- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
@@ -688,7 +688,7 @@ VirtualGPU::VirtualGPU(Device& device)
tsCache_(nullptr),
dmaFlushMgmt_(device),
managedBuffer_(*this, device.settings().stagedXferSize_ + 32 * Ki),
- writeBuffer_(managedBuffer_, device.settings().stagedXferSize_),
+ writeBuffer_(device, managedBuffer_, device.settings().stagedXferSize_),
hwRing_(0),
readjustTimeGPU_(0),
lastTS_(nullptr),
diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp
index 2d73accdf0..88442ed901 100644
--- a/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.hpp
@@ -376,7 +376,7 @@ class VirtualGPU : public device::VirtualDevice {
);
//! Return xfer buffer for staging operations
- const XferBuffer& xferWrite() const { return writeBuffer_; }
+ XferBuffer& xferWrite() { return writeBuffer_; }
//! Adds a pinned memory object into a map
void addPinnedMem(amd::Memory* mem);