P4 to Git Change 1544622 by gandryey@gera-w8 on 2018/04/20 17:02:52
SWDEV-79445 - OCL generic changes and code clean-up
- Add managed buffer support and replace all uploads with the managed buffer allocations
- Add staging copy for small image writes
- Replace constant buffer in FillBuffer with a managed buffer also
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#84 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#62 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#63 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#92 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#48 edit
[ROCm/clr commit: 392724cc3f]
Этот коммит содержится в:
@@ -256,34 +256,36 @@ bool DmaBlitManager::readImage(device::Memory& srcMemory, void* dstHost, const a
|
||||
bool DmaBlitManager::writeMemoryStaged(const void* srcHost, Memory& dstMemory, Memory& xferBuf,
|
||||
size_t origin, size_t& offset, size_t& totalSize,
|
||||
size_t xferSize) const {
|
||||
amd::Coord3D src(0, 0, 0);
|
||||
size_t chunkSize;
|
||||
static const bool CopyRect = false;
|
||||
// Flush DMA for ASYNC copy
|
||||
// @todo Blocking write requires a flush to start earlier,
|
||||
// but currently VDI doesn't provide that info
|
||||
static const bool FlushDMA = false;
|
||||
bool flushDMA = false;
|
||||
|
||||
if (dev().xferRead().bufSize() < 128 * Ki) {
|
||||
chunkSize = dev().xferWrite().bufSize();
|
||||
if (gpu().xferWrite().MaxSize() < 128 * Ki) {
|
||||
chunkSize = gpu().xferWrite().MaxSize();
|
||||
} else {
|
||||
chunkSize = std::min(amd::alignUp(xferSize / 4, 256), dev().xferWrite().bufSize());
|
||||
chunkSize = std::min(amd::alignUp(xferSize / 4, 256), gpu().xferWrite().MaxSize());
|
||||
chunkSize = std::max(chunkSize, 128 * Ki);
|
||||
bool flushDMA = true;
|
||||
}
|
||||
|
||||
while (xferSize != 0) {
|
||||
// Find the partial transfer size
|
||||
size_t tmpSize = std::min(chunkSize, xferSize);
|
||||
amd::Coord3D src(offset, 0, 0);
|
||||
amd::Coord3D dst(origin + offset, 0, 0);
|
||||
amd::Coord3D copySize(tmpSize, 0, 0);
|
||||
|
||||
// Copy data into the temporary buffer, using CPU
|
||||
if (!xferBuf.hostWrite(&gpu(), reinterpret_cast<const char*>(srcHost) + offset, src, copySize)) {
|
||||
if (!xferBuf.hostWrite(&gpu(), reinterpret_cast<const char*>(srcHost) + offset,
|
||||
src, copySize, Resource::NoWait)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy data into the original destination memory
|
||||
if (!xferBuf.partialMemCopyTo(gpu(), src, dst, copySize, dstMemory, CopyRect, FlushDMA)) {
|
||||
if (!xferBuf.partialMemCopyTo(gpu(), src, dst, copySize, dstMemory, CopyRect, flushDMA)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -365,7 +367,7 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
|
||||
}
|
||||
|
||||
if (dstSize != 0) {
|
||||
Memory& xferBuf = dev().xferWrite().acquire();
|
||||
Memory& xferBuf = gpu().xferWrite().Acquire(dstSize);
|
||||
|
||||
// Write memory using a staged resource
|
||||
if (!writeMemoryStaged(srcHost, gpuMem(dstMemory), xferBuf, origin[0], offset, dstSize,
|
||||
@@ -374,7 +376,7 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
|
||||
return false;
|
||||
}
|
||||
|
||||
gpu().addXferWrite(xferBuf);
|
||||
gpu().xferWrite().Release(xferBuf);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -392,7 +394,7 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
|
||||
gpuMem(dstMemory).isPersistentDirectMap()) {
|
||||
return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
|
||||
} else {
|
||||
Memory& xferBuf = dev().xferWrite().acquire();
|
||||
Memory& xferBuf = gpu().xferWrite().Acquire(std::min(gpu().xferWrite().MaxSize(), size[0]));
|
||||
|
||||
amd::Coord3D src(0, 0, 0);
|
||||
size_t tmpSize = 0;
|
||||
@@ -408,7 +410,7 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
|
||||
|
||||
while (dstSize != 0) {
|
||||
// Find the partial transfer size
|
||||
tmpSize = std::min(dev().xferWrite().bufSize(), dstSize);
|
||||
tmpSize = std::min(gpu().xferWrite().MaxSize(), dstSize);
|
||||
|
||||
amd::Coord3D dst(bufOffset, 0, 0);
|
||||
amd::Coord3D copySize(tmpSize, 0, 0);
|
||||
@@ -432,7 +434,7 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
|
||||
}
|
||||
}
|
||||
}
|
||||
gpu().addXferWrite(xferBuf);
|
||||
gpu().xferWrite().Release(xferBuf);
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -576,8 +578,8 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory
|
||||
entire, rowPitch, slicePitch);
|
||||
} else {
|
||||
// Use PAL path for a transfer
|
||||
result =
|
||||
gpuMem(srcMemory).partialMemCopyTo(gpu(), srcOrigin, dstOrigin, size, gpuMem(dstMemory));
|
||||
result = gpuMem(srcMemory).partialMemCopyTo(gpu(), srcOrigin, dstOrigin,
|
||||
size, gpuMem(dstMemory));
|
||||
|
||||
// Check if a HostBlit transfer is required
|
||||
if (completeOperation_ && !result) {
|
||||
@@ -607,9 +609,8 @@ bool DmaBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMem
|
||||
KernelBlitManager::KernelBlitManager(VirtualGPU& gpu, Setup setup)
|
||||
: DmaBlitManager(gpu, setup),
|
||||
program_(NULL),
|
||||
constantBuffer_(NULL),
|
||||
xferBufferSize_(0),
|
||||
lockXferOps_(NULL) {
|
||||
lockXferOps_("Transfer Ops Lock", true) {
|
||||
for (uint i = 0; i < BlitTotal; ++i) {
|
||||
kernels_[i] = NULL;
|
||||
}
|
||||
@@ -636,17 +637,11 @@ KernelBlitManager::~KernelBlitManager() {
|
||||
context_->release();
|
||||
}
|
||||
|
||||
if (NULL != constantBuffer_) {
|
||||
constantBuffer_->release();
|
||||
}
|
||||
|
||||
for (uint i = 0; i < MaxXferBuffers; ++i) {
|
||||
if (NULL != xferBuffers_[i]) {
|
||||
xferBuffers_[i]->release();
|
||||
}
|
||||
}
|
||||
|
||||
delete lockXferOps_;
|
||||
}
|
||||
|
||||
bool KernelBlitManager::create(amd::Device& device) {
|
||||
@@ -693,19 +688,6 @@ bool KernelBlitManager::createProgram(Device& device) {
|
||||
result = true;
|
||||
} while (!result);
|
||||
|
||||
// Create an internal constant buffer
|
||||
constantBuffer_ = new (*context_) amd::Buffer(*context_, CL_MEM_ALLOC_HOST_PTR, 4 * Ki);
|
||||
|
||||
if ((constantBuffer_ != NULL) && !constantBuffer_->create(NULL)) {
|
||||
constantBuffer_->release();
|
||||
constantBuffer_ = NULL;
|
||||
return false;
|
||||
} else if (constantBuffer_ == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Assign the constant buffer to the current virtual GPU
|
||||
constantBuffer_->setVirtualDevice(&gpu());
|
||||
|
||||
if (dev().settings().xferBufSize_ > 0) {
|
||||
xferBufferSize_ = dev().settings().xferBufSize_;
|
||||
@@ -734,11 +716,6 @@ bool KernelBlitManager::createProgram(Device& device) {
|
||||
}
|
||||
}
|
||||
|
||||
lockXferOps_ = new amd::Monitor("Transfer Ops Lock", true);
|
||||
if (NULL == lockXferOps_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -1685,30 +1662,43 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
|
||||
} else {
|
||||
size_t pinSize;
|
||||
FindPinSize(pinSize, size, rowPitch, slicePitch, gpuMem(dstMemory));
|
||||
size_t partial = 0;
|
||||
bool pinned;
|
||||
|
||||
size_t partial;
|
||||
amd::Memory* amdMemory = pinHostMemory(srcHost, pinSize, partial);
|
||||
|
||||
if (amdMemory == NULL) {
|
||||
// Force SW copy
|
||||
result = HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
|
||||
entire);
|
||||
synchronize();
|
||||
return result;
|
||||
amd::Memory* amdMemory = nullptr;
|
||||
Memory* srcMemory;
|
||||
if (pinSize > gpu().xferWrite().MaxSize()) {
|
||||
amdMemory = pinHostMemory(srcHost, pinSize, partial);
|
||||
if (amdMemory == nullptr) {
|
||||
// Force SW copy
|
||||
result = HostBlitManager::writeImage(srcHost, dstMemory,
|
||||
origin, size, rowPitch, slicePitch, entire);
|
||||
synchronize();
|
||||
return result;
|
||||
}
|
||||
// Get device memory for this virtual device
|
||||
srcMemory = dev().getGpuMemory(amdMemory);
|
||||
pinned = true;
|
||||
}
|
||||
else {
|
||||
srcMemory = &gpu().xferWrite().Acquire(pinSize);
|
||||
srcMemory->hostWrite(&gpu(), srcHost, 0, pinSize, Resource::NoWait);
|
||||
pinned = false;
|
||||
}
|
||||
|
||||
// Readjust destination offset
|
||||
const amd::Coord3D srcOrigin(partial);
|
||||
|
||||
// Get device memory for this virtual device
|
||||
Memory* srcMemory = dev().getGpuMemory(amdMemory);
|
||||
|
||||
// Copy image to buffer
|
||||
result = copyBufferToImage(*srcMemory, dstMemory, srcOrigin, origin, size, entire, rowPitch,
|
||||
slicePitch);
|
||||
|
||||
// Add pinned memory for a later release
|
||||
gpu().addPinnedMem(amdMemory);
|
||||
if (pinned) {
|
||||
// Add pinned memory for a later release
|
||||
gpu().addPinnedMem(amdMemory);
|
||||
} else {
|
||||
gpu().xferWrite().Release(*srcMemory);
|
||||
}
|
||||
}
|
||||
|
||||
synchronize();
|
||||
@@ -2054,14 +2044,12 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern,
|
||||
setArgument(kernels_[fillType], 0, sizeof(cl_mem), &mem);
|
||||
setArgument(kernels_[fillType], 1, sizeof(cl_mem), NULL);
|
||||
}
|
||||
Memory* gpuCB = dev().getGpuMemory(constantBuffer_);
|
||||
if (gpuCB == NULL) {
|
||||
return false;
|
||||
}
|
||||
void* constBuf = gpuCB->map(&gpu(), Resource::WriteOnly);
|
||||
Memory& gpuCB = gpu().xferWrite().Acquire(patternSize);
|
||||
void* constBuf = gpuCB.map(&gpu(), Resource::NoWait);
|
||||
memcpy(constBuf, pattern, patternSize);
|
||||
gpuCB->unmap(&gpu());
|
||||
setArgument(kernels_[fillType], 2, sizeof(cl_mem), &gpuCB);
|
||||
gpuCB.unmap(&gpu());
|
||||
Memory* pGpuCB = &gpuCB;
|
||||
setArgument(kernels_[fillType], 2, sizeof(cl_mem), &pGpuCB);
|
||||
cl_ulong offset = origin[0];
|
||||
if (dwordAligned) {
|
||||
patternSize /= sizeof(uint32_t);
|
||||
@@ -2077,6 +2065,7 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern,
|
||||
// Execute the blit
|
||||
address parameters = kernels_[fillType]->parameters().values();
|
||||
result = gpu().submitKernelInternal(ndrange, *kernels_[fillType], parameters);
|
||||
gpu().xferWrite().Release(gpuCB);
|
||||
}
|
||||
|
||||
synchronize();
|
||||
@@ -2137,12 +2126,10 @@ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& ds
|
||||
setArgument(kernels_[blitType], 1, sizeof(cl_mem), &mem);
|
||||
// Program source origin
|
||||
cl_ulong srcOffset = srcOrigin[0] / CopyBuffAlignment[i];
|
||||
;
|
||||
setArgument(kernels_[blitType], 2, sizeof(srcOffset), &srcOffset);
|
||||
|
||||
// Program destinaiton origin
|
||||
cl_ulong dstOffset = dstOrigin[0] / CopyBuffAlignment[i];
|
||||
;
|
||||
setArgument(kernels_[blitType], 3, sizeof(dstOffset), &dstOffset);
|
||||
|
||||
cl_ulong copySize = size[0];
|
||||
|
||||
@@ -352,7 +352,7 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
const void* data //!< Raw data pointer
|
||||
) const;
|
||||
|
||||
virtual amd::Monitor* lockXfer() const { return lockXferOps_; }
|
||||
virtual amd::Monitor* lockXfer() const { return &lockXferOps_; }
|
||||
|
||||
private:
|
||||
static const size_t MaxXferBuffers = 2;
|
||||
@@ -397,10 +397,9 @@ class KernelBlitManager : public DmaBlitManager {
|
||||
|
||||
amd::Program* program_; //!< GPU program obejct
|
||||
amd::Kernel* kernels_[BlitTotal]; //!< GPU kernels for blit
|
||||
amd::Memory* constantBuffer_; //!< An internal CB for blits
|
||||
amd::Memory* xferBuffers_[MaxXferBuffers]; //!< Transfer buffers for images
|
||||
size_t xferBufferSize_; //!< Transfer buffer size
|
||||
amd::Monitor* lockXferOps_; //!< Lock transfer operation
|
||||
mutable amd::Monitor lockXferOps_; //!< Lock transfer operation
|
||||
};
|
||||
|
||||
static const char* BlitName[KernelBlitManager::BlitTotal] = {
|
||||
|
||||
@@ -19,7 +19,7 @@ ManagedBuffer::ManagedBuffer(VirtualGPU& gpu, uint32_t size)
|
||||
, wrtAddress_(nullptr) {}
|
||||
|
||||
// ================================================================================================
|
||||
ManagedBuffer::~ManagedBuffer() {
|
||||
void ManagedBuffer::release() {
|
||||
for (auto it : buffers_) {
|
||||
if (it->data() != nullptr) {
|
||||
it->unmap(&gpu_);
|
||||
@@ -72,13 +72,26 @@ address ManagedBuffer::reserve(uint32_t size, uint64_t* gpu_address) {
|
||||
|
||||
*gpu_address = buffers_[activeBuffer_]->vmAddress() + wrtOffset_;
|
||||
address cpu_address = wrtAddress_ + wrtOffset_;
|
||||
|
||||
|
||||
// Adjust the offset by the reserved size
|
||||
wrtOffset_ += count;
|
||||
|
||||
return cpu_address;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
Memory& ManagedBuffer::reserveAtTheTop(uint32_t size)
|
||||
{
|
||||
// Get the next buffer in the list
|
||||
++activeBuffer_;
|
||||
activeBuffer_ %= MaxNumberOfBuffers;
|
||||
// Make sure the buffer isn't busy
|
||||
buffers_[activeBuffer_]->wait(gpu_);
|
||||
wrtAddress_ = buffers_[activeBuffer_]->data();
|
||||
wrtOffset_ = 0;
|
||||
return *buffers_[activeBuffer_];
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
ConstantBuffer::ConstantBuffer(ManagedBuffer& mbuf, uint32_t size)
|
||||
: mbuf_(mbuf)
|
||||
@@ -114,11 +127,47 @@ uint64_t ConstantBuffer::UploadDataToHw(uint32_t size) const {
|
||||
|
||||
// ================================================================================================
|
||||
uint64_t ConstantBuffer::UploadDataToHw(const void* sysmem, uint32_t size) const {
|
||||
uint64_t vm_address;
|
||||
address cpu_address = mbuf_.reserve(size, &vm_address);
|
||||
// Update memory with new CB data
|
||||
memcpy(cpu_address, sysmem, size);
|
||||
return vm_address;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
XferBuffer::XferBuffer(ManagedBuffer& mbuf, uint32_t size)
|
||||
: mbuf_(mbuf)
|
||||
, size_(size)
|
||||
{}
|
||||
|
||||
// ================================================================================================
|
||||
Memory& XferBuffer::Acquire(uint32_t size) const
|
||||
{
|
||||
uint64_t vm_address;
|
||||
// Reserve space in the managed buffer
|
||||
address cpu_address = mbuf_.reserve(size, &vm_address);
|
||||
// Update memory with new CB data
|
||||
memcpy(cpu_address, sysmem, size);
|
||||
return vm_address;
|
||||
// Create a view for access
|
||||
Memory* mem = new Memory(mbuf_.gpu().dev(), static_cast<size_t>(size));
|
||||
Resource::ViewParams params = {};
|
||||
params.gpu_ = &mbuf_.gpu();
|
||||
params.offset_ = vm_address - mbuf_.vmAddress();
|
||||
params.size_ = size;
|
||||
params.resource_ = mbuf_.activeMemory();
|
||||
if (nullptr == mem || !mem->create(Resource::View, ¶ms)) {
|
||||
delete mem;
|
||||
// If the suballocaiton failed for some reason, then return the top of the active buffer
|
||||
return mbuf_.reserveAtTheTop(size);
|
||||
}
|
||||
return *mem;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void XferBuffer::Release(Memory& mem) const
|
||||
{
|
||||
// Delete view
|
||||
if (mem.desc().type_ == Resource::View) {
|
||||
delete &mem;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace pal
|
||||
|
||||
@@ -9,19 +9,20 @@
|
||||
namespace pal {
|
||||
|
||||
//! Managed buffer (staging or constant)
|
||||
class ManagedBuffer : public amd::HeapObject {
|
||||
class ManagedBuffer : public amd::EmbeddedObject {
|
||||
public:
|
||||
//! Constructor for the ConstBuffer class
|
||||
ManagedBuffer(VirtualGPU& gpu, //!< Virtual GPU device object
|
||||
uint32_t size //!< size of the managed buffers in bytes
|
||||
);
|
||||
~ManagedBuffer() {}
|
||||
|
||||
//! Destructor for the ConstBuffer class
|
||||
~ManagedBuffer();
|
||||
|
||||
//! Creates the real HW constant buffer
|
||||
//! Creates the managed buffers
|
||||
bool create(Resource::MemoryType type);
|
||||
|
||||
//! Release the managed buffers
|
||||
void release();
|
||||
|
||||
/*! \brief Uploads current constant buffer data from sysMemCopy_ to HW
|
||||
*
|
||||
* \return True if the data upload was succesful
|
||||
@@ -29,6 +30,9 @@ class ManagedBuffer : public amd::HeapObject {
|
||||
address reserve(uint32_t size, //!< real data size for upload
|
||||
uint64_t* gpu_address);
|
||||
|
||||
//! Reserves memory at the top of the active buffer
|
||||
Memory& reserveAtTheTop(uint32_t size);
|
||||
|
||||
//! Returns CB size
|
||||
uint32_t size() const { return size_; }
|
||||
|
||||
@@ -40,6 +44,9 @@ class ManagedBuffer : public amd::HeapObject {
|
||||
|
||||
uint64_t vmAddress() const { return buffers_[activeBuffer_]->vmAddress(); }
|
||||
|
||||
//! Returns VirtualGPU object this managed resource associated
|
||||
VirtualGPU& gpu() const { return gpu_; }
|
||||
|
||||
private:
|
||||
//! The maximum number of the managed buffers
|
||||
static constexpr uint32_t MaxNumberOfBuffers = 3;
|
||||
@@ -63,13 +70,13 @@ class ConstantBuffer : public amd::HeapObject {
|
||||
public:
|
||||
//! Constructor for the ConstBuffer class
|
||||
ConstantBuffer(ManagedBuffer& mbuf, //!< Managed buffer
|
||||
uint32_t size
|
||||
uint32_t size //!< Max size of the constant buffer
|
||||
);
|
||||
|
||||
//! Destructor for the ConstBuffer class
|
||||
~ConstantBuffer();
|
||||
|
||||
//! Creates the real HW constant buffer
|
||||
//! Creates the HW constant buffer
|
||||
bool Create();
|
||||
|
||||
/*! \brief Uploads current constant buffer data from sysMemCopy_ to HW
|
||||
@@ -88,7 +95,7 @@ public:
|
||||
) const;
|
||||
|
||||
//! Returns a pointer to the system memory copy for CB
|
||||
address SysMemCopy(uint32_t size = 0) const { return sys_mem_copy_; }
|
||||
address SysMemCopy() const { return sys_mem_copy_; }
|
||||
|
||||
//! Returns active GPU buffer
|
||||
Memory* ActiveMemory() const { return mbuf_.activeMemory(); }
|
||||
@@ -105,4 +112,38 @@ private:
|
||||
uint32_t size_; //!< Constant buffer size
|
||||
};
|
||||
|
||||
//! Staging buffer
|
||||
class XferBuffer : public amd::EmbeddedObject {
|
||||
public:
|
||||
//! Constructor for the ConstBuffer class
|
||||
XferBuffer(ManagedBuffer& mbuf, //!< Managed buffer
|
||||
uint32_t size //!< Maximum size of the transfer buffer
|
||||
);
|
||||
|
||||
//! Destructor for the ConstBuffer class
|
||||
~XferBuffer() {}
|
||||
|
||||
/*! \brief Acquires free memory from the managed buffer
|
||||
*
|
||||
* \return GPU memory object associated with free memory
|
||||
*/
|
||||
Memory& Acquire(uint32_t size //!< data size for transfers
|
||||
) const;
|
||||
|
||||
//! Releases memory object used in the staging transfer
|
||||
void Release(Memory& mem //!< Memory object for release
|
||||
) const;
|
||||
|
||||
size_t MaxSize() const { return static_cast<size_t>(size_); }
|
||||
|
||||
private:
|
||||
//! Disable copy constructor
|
||||
XferBuffer(const XferBuffer&) = delete;
|
||||
|
||||
//! Disable operator=
|
||||
XferBuffer& operator=(const XferBuffer&) = delete;
|
||||
|
||||
ManagedBuffer& mbuf_; //!< Managed buffer on GPU
|
||||
uint32_t size_; //!< Mx staging buffer size
|
||||
};
|
||||
/*@}*/} // namespace pal
|
||||
|
||||
@@ -681,7 +681,6 @@ Device::Device()
|
||||
scratchAlloc_(nullptr),
|
||||
mapCacheOps_(nullptr),
|
||||
xferRead_(nullptr),
|
||||
xferWrite_(nullptr),
|
||||
mapCache_(nullptr),
|
||||
resourceCache_(nullptr),
|
||||
numComputeEngines_(0),
|
||||
@@ -732,7 +731,6 @@ Device::~Device() {
|
||||
|
||||
// Destroy temporary buffers for read/write
|
||||
delete xferRead_;
|
||||
delete xferWrite_;
|
||||
|
||||
// Destroy resource cache
|
||||
delete resourceCache_;
|
||||
@@ -986,21 +984,6 @@ bool Device::initializeHeapResources() {
|
||||
}
|
||||
|
||||
if (settings().stagedXferSize_ != 0) {
|
||||
// Initialize staged write buffers
|
||||
if (settings().stagedXferWrite_) {
|
||||
Resource::MemoryType type;
|
||||
if (settings().stagingWritePersistent_ && !settings().disablePersistent_) {
|
||||
type = Resource::Persistent;
|
||||
} else {
|
||||
type = Resource::RemoteUSWC;
|
||||
}
|
||||
xferWrite_ = new XferBuffers(*this, type, amd::alignUp(settings().stagedXferSize_, 4 * Ki));
|
||||
if ((xferWrite_ == nullptr) || !xferWrite_->create()) {
|
||||
LogError("Couldn't allocate transfer buffer objects for read");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize staged read buffers
|
||||
if (settings().stagedXferRead_) {
|
||||
xferRead_ = new XferBuffers(*this, Resource::Remote,
|
||||
|
||||
@@ -396,9 +396,6 @@ class Device : public NullDevice {
|
||||
pal::Memory* createScratchBuffer(size_t size //!< Size of buffer
|
||||
) const;
|
||||
|
||||
//! Returns transfer buffer object
|
||||
XferBuffers& xferWrite() const { return *xferWrite_; }
|
||||
|
||||
//! Returns transfer buffer object
|
||||
XferBuffers& xferRead() const { return *xferRead_; }
|
||||
|
||||
@@ -588,7 +585,6 @@ class Device : public NullDevice {
|
||||
amd::Monitor* scratchAlloc_; //!< Lock to serialise scratch allocation
|
||||
amd::Monitor* mapCacheOps_; //!< Lock to serialise cache for the map resources
|
||||
XferBuffers* xferRead_; //!< Transfer buffers read
|
||||
XferBuffers* xferWrite_; //!< Transfer buffers write
|
||||
std::vector<amd::Memory*>* mapCache_; //!< Map cache info structure
|
||||
ResourceCache* resourceCache_; //!< Resource cache
|
||||
uint numComputeEngines_; //!< The number of available compute engines
|
||||
|
||||
@@ -86,19 +86,19 @@ void Segment::copy(size_t offset, const void* src, size_t size) {
|
||||
if (cpuAccess_ != nullptr) {
|
||||
amd::Os::fastMemcpy(cpuAddress(offset), src, size);
|
||||
} else {
|
||||
amd::ScopedLock k(gpuAccess_->dev().xferMgr().lockXfer());
|
||||
VirtualGPU& gpu = *gpuAccess_->dev().xferQueue();
|
||||
Memory& xferBuf = gpuAccess_->dev().xferWrite().acquire();
|
||||
Memory& xferBuf = gpu.xferWrite().Acquire(size);
|
||||
size_t tmpSize = std::min(static_cast<size_t>(xferBuf.size()), size);
|
||||
size_t srcOffs = 0;
|
||||
while (size != 0) {
|
||||
amd::ScopedLock k(gpuAccess_->dev().xferMgr().lockXfer());
|
||||
xferBuf.hostWrite(&gpu, reinterpret_cast<const_address>(src) + srcOffs, 0, tmpSize);
|
||||
xferBuf.partialMemCopyTo(gpu, 0, (offset + srcOffs), tmpSize, *gpuAccess_, false, true);
|
||||
size -= tmpSize;
|
||||
srcOffs += tmpSize;
|
||||
tmpSize = std::min(static_cast<size_t>(xferBuf.size()), size);
|
||||
}
|
||||
gpuAccess_->dev().xferWrite().release(gpu, xferBuf);
|
||||
gpu.xferWrite().Release(xferBuf);
|
||||
gpu.waitAllEngines();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1046,8 +1046,8 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
|
||||
uint viewFlags = 0;
|
||||
Pal::ChannelMapping channels;
|
||||
Pal::ChNumFormat format = dev().getPalFormat(desc().format_, &channels);
|
||||
// Set the initial offset value for any resource to 0.
|
||||
// Note: Runtime can call create() more than once, if the initial memory type failed
|
||||
// Set the initial offset value for any resource to 0.
|
||||
// Note: Runtime can call create() more than once, if the initial memory type failed
|
||||
offset_ = 0;
|
||||
|
||||
// This is a thread safe operation
|
||||
@@ -1096,7 +1096,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
|
||||
if (!desc_.buffer_) {
|
||||
return CreateImage(params);
|
||||
}
|
||||
|
||||
|
||||
if (memoryType() == Pinned) {
|
||||
return CreatePinned(params);
|
||||
}
|
||||
@@ -1112,6 +1112,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
|
||||
offset_ += viewOwner_->offset();
|
||||
if (viewOwner_->data() != nullptr) {
|
||||
address_ = viewOwner_->data() + view->offset_;
|
||||
mapCount_++;
|
||||
}
|
||||
memRef_ = viewOwner_->memRef_;
|
||||
memRef_->retain();
|
||||
@@ -1177,11 +1178,6 @@ void Resource::free()
|
||||
return;
|
||||
}
|
||||
|
||||
// Sanity check for the map calls
|
||||
if ((mapCount_ != 0) && (memoryType() != Remote) &&
|
||||
(memoryType() != RemoteUSWC) && (memoryType() != Persistent)) {
|
||||
LogWarning("Resource wasn't unlocked, but destroyed!");
|
||||
}
|
||||
const bool wait =
|
||||
(memoryType() != ImageView) && (memoryType() != ImageBuffer) && (memoryType() != View);
|
||||
|
||||
@@ -1206,7 +1202,7 @@ void Resource::free()
|
||||
|
||||
// Destroy PAL resource
|
||||
if (iMem() != 0) {
|
||||
if (mapCount_ != 0) {
|
||||
if (mapCount_ != 0 && wait) {
|
||||
if ((memoryType() != Remote) && (memoryType() != RemoteUSWC)) {
|
||||
//! @note: This is a workaround for bad applications that don't unmap memory
|
||||
unmap(nullptr);
|
||||
@@ -1738,6 +1734,7 @@ void* Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers
|
||||
address_ = reinterpret_cast<uint8_t*>(memRef_->cpuAddress_) + subOffset_;
|
||||
} else {
|
||||
address_ = gpuMemoryMap(&desc_.pitch_, flags, iMem());
|
||||
address_ = reinterpret_cast<address>(address_) + offset_;
|
||||
}
|
||||
if (address_ == nullptr) {
|
||||
LogError("cal::ResMap failed!");
|
||||
|
||||
@@ -542,23 +542,6 @@ bool VirtualGPU::DmaFlushMgmt::isCbReady(VirtualGPU& gpu, uint64_t threads, uint
|
||||
return cbReady;
|
||||
}
|
||||
|
||||
void VirtualGPU::addXferWrite(Memory& memory) {
|
||||
if (xferWriteBuffers_.size() > 7) {
|
||||
dev().xferWrite().release(*this, *xferWriteBuffers_.front());
|
||||
xferWriteBuffers_.erase(xferWriteBuffers_.begin());
|
||||
}
|
||||
|
||||
// Delay destruction
|
||||
xferWriteBuffers_.push_back(&memory);
|
||||
}
|
||||
|
||||
void VirtualGPU::releaseXferWrite() {
|
||||
for (auto& memory : xferWriteBuffers_) {
|
||||
dev().xferWrite().release(*this, *memory);
|
||||
}
|
||||
xferWriteBuffers_.resize(0);
|
||||
}
|
||||
|
||||
void VirtualGPU::addPinnedMem(amd::Memory* mem) {
|
||||
if (nullptr == findPinnedMem(mem->getHostMem(), mem->getSize())) {
|
||||
if (pinnedMems_.size() > 7) {
|
||||
@@ -718,7 +701,8 @@ VirtualGPU::VirtualGPU(Device& device)
|
||||
printfDbgHSA_(nullptr),
|
||||
tsCache_(nullptr),
|
||||
dmaFlushMgmt_(device),
|
||||
writeBuffer_(nullptr),
|
||||
managedBuffer_(*this, device.settings().stagedXferSize_ + 32 * Ki),
|
||||
writeBuffer_(managedBuffer_, device.settings().stagedXferSize_),
|
||||
hwRing_(0),
|
||||
readjustTimeGPU_(0),
|
||||
lastTS_(nullptr),
|
||||
@@ -834,10 +818,7 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs,
|
||||
Unimplemented();
|
||||
}
|
||||
|
||||
writeBuffer_ = new ManagedBuffer(*this, dev().settings().stagedXferSize_);
|
||||
if ((writeBuffer_ == nullptr) || !writeBuffer_->create(Resource::RemoteUSWC)) {
|
||||
// We failed to create a constant buffer
|
||||
delete writeBuffer_;
|
||||
if (!managedBuffer_.create(Resource::RemoteUSWC)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -963,7 +944,7 @@ VirtualGPU::~VirtualGPU() {
|
||||
delete constBufs_[i];
|
||||
}
|
||||
|
||||
delete writeBuffer_;
|
||||
managedBuffer_.release();
|
||||
|
||||
//! @todo Temporarily keep the buffer mapped for debug purpose
|
||||
if (nullptr != schedParams_) {
|
||||
@@ -2758,9 +2739,6 @@ bool VirtualGPU::waitAllEngines(CommandBatch* cb) {
|
||||
earlyDone &= isDone(&events[i]);
|
||||
}
|
||||
|
||||
// Release all transfer buffers on this command queue
|
||||
releaseXferWrite();
|
||||
|
||||
// Rlease all pinned memory
|
||||
releasePinnedMem();
|
||||
|
||||
@@ -2813,14 +2791,14 @@ void VirtualGPU::waitEventLock(CommandBatch* cb) {
|
||||
}
|
||||
|
||||
bool VirtualGPU::allocConstantBuffers() {
|
||||
// Allocate constant buffers.
|
||||
// Allocate constant buffers.
|
||||
// Use double size, reported to the app to account for internal arguments
|
||||
const uint32_t MinCbSize = 2 * dev().info().maxParameterSize_;
|
||||
uint i;
|
||||
|
||||
// Create/reallocate constant buffer resources
|
||||
for (i = 0; i < MaxConstBuffersArguments; ++i) {
|
||||
ConstantBuffer* constBuf = new ConstantBuffer(*writeBuffer_, MinCbSize);
|
||||
ConstantBuffer* constBuf = new ConstantBuffer(managedBuffer_, MinCbSize);
|
||||
|
||||
if ((constBuf != nullptr) && constBuf->Create()) {
|
||||
addConstBuffer(constBuf);
|
||||
|
||||
@@ -375,8 +375,8 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
bool pfpaDoppCmd //!< is a submission for the pre-present primary
|
||||
);
|
||||
|
||||
//! Adds a stage write buffer into a list
|
||||
void addXferWrite(Memory& memory);
|
||||
//! Return xfer buffer for staging operations
|
||||
const XferBuffer& xferWrite() const { return writeBuffer_; }
|
||||
|
||||
//! Adds a pinned memory object into a map
|
||||
void addPinnedMem(amd::Memory* mem);
|
||||
@@ -518,9 +518,6 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
//! Allocates constant buffers
|
||||
bool allocConstantBuffers();
|
||||
|
||||
//! Releases stage write buffers
|
||||
void releaseXferWrite();
|
||||
|
||||
//! Allocate hsaQueueMem_
|
||||
bool allocHsaQueueMem();
|
||||
|
||||
@@ -594,11 +591,11 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
|
||||
DmaFlushMgmt dmaFlushMgmt_; //!< DMA flush management
|
||||
|
||||
std::vector<Memory*> xferWriteBuffers_; //!< Stage write buffers
|
||||
std::vector<amd::Memory*> pinnedMems_; //!< Pinned memory list
|
||||
|
||||
ManagedBuffer* writeBuffer_; //!< Managed write buffer
|
||||
constbufs_t constBufs_; //!< constant buffers
|
||||
ManagedBuffer managedBuffer_; //!< Managed write buffer
|
||||
constbufs_t constBufs_; //!< constant buffers
|
||||
XferBuffer writeBuffer_; //!< Transfer/staging buffer for uploads
|
||||
|
||||
typedef std::queue<CommandBatch*> CommandBatchQueue;
|
||||
CommandBatchQueue cbQueue_; //!< Queue of command batches
|
||||
|
||||
Ссылка в новой задаче
Block a user