P4 to Git Change 1544622 by gandryey@gera-w8 on 2018/04/20 17:02:52

SWDEV-79445 - OCL generic changes and code clean-up
	- Add managed buffer support and replace all uploads with the managed buffer allocations
	- Add staging copy for small image writes
	- Replace constant buffer in FillBuffer with a managed buffer also

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.cpp#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palblit.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.cpp#8 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palconstbuf.hpp#7 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#84 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#26 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palprogram.cpp#62 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#63 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#92 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#48 edit


[ROCm/clr commit: 392724cc3f]
Этот коммит содержится в:
foreman
2018-04-20 17:08:29 -04:00
родитель 5a10edc27a
Коммит 141d36d849
10 изменённых файлов: 175 добавлений и 148 удалений
+50 -63
Просмотреть файл
@@ -256,34 +256,36 @@ bool DmaBlitManager::readImage(device::Memory& srcMemory, void* dstHost, const a
bool DmaBlitManager::writeMemoryStaged(const void* srcHost, Memory& dstMemory, Memory& xferBuf,
size_t origin, size_t& offset, size_t& totalSize,
size_t xferSize) const {
amd::Coord3D src(0, 0, 0);
size_t chunkSize;
static const bool CopyRect = false;
// Flush DMA for ASYNC copy
// @todo Blocking write requires a flush to start earlier,
// but currently VDI doesn't provide that info
static const bool FlushDMA = false;
bool flushDMA = false;
if (dev().xferRead().bufSize() < 128 * Ki) {
chunkSize = dev().xferWrite().bufSize();
if (gpu().xferWrite().MaxSize() < 128 * Ki) {
chunkSize = gpu().xferWrite().MaxSize();
} else {
chunkSize = std::min(amd::alignUp(xferSize / 4, 256), dev().xferWrite().bufSize());
chunkSize = std::min(amd::alignUp(xferSize / 4, 256), gpu().xferWrite().MaxSize());
chunkSize = std::max(chunkSize, 128 * Ki);
bool flushDMA = true;
}
while (xferSize != 0) {
// Find the partial transfer size
size_t tmpSize = std::min(chunkSize, xferSize);
amd::Coord3D src(offset, 0, 0);
amd::Coord3D dst(origin + offset, 0, 0);
amd::Coord3D copySize(tmpSize, 0, 0);
// Copy data into the temporary buffer, using CPU
if (!xferBuf.hostWrite(&gpu(), reinterpret_cast<const char*>(srcHost) + offset, src, copySize)) {
if (!xferBuf.hostWrite(&gpu(), reinterpret_cast<const char*>(srcHost) + offset,
src, copySize, Resource::NoWait)) {
return false;
}
// Copy data into the original destination memory
if (!xferBuf.partialMemCopyTo(gpu(), src, dst, copySize, dstMemory, CopyRect, FlushDMA)) {
if (!xferBuf.partialMemCopyTo(gpu(), src, dst, copySize, dstMemory, CopyRect, flushDMA)) {
return false;
}
@@ -365,7 +367,7 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
}
if (dstSize != 0) {
Memory& xferBuf = dev().xferWrite().acquire();
Memory& xferBuf = gpu().xferWrite().Acquire(dstSize);
// Write memory using a staged resource
if (!writeMemoryStaged(srcHost, gpuMem(dstMemory), xferBuf, origin[0], offset, dstSize,
@@ -374,7 +376,7 @@ bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
return false;
}
gpu().addXferWrite(xferBuf);
gpu().xferWrite().Release(xferBuf);
}
}
@@ -392,7 +394,7 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
gpuMem(dstMemory).isPersistentDirectMap()) {
return HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
} else {
Memory& xferBuf = dev().xferWrite().acquire();
Memory& xferBuf = gpu().xferWrite().Acquire(std::min(gpu().xferWrite().MaxSize(), size[0]));
amd::Coord3D src(0, 0, 0);
size_t tmpSize = 0;
@@ -408,7 +410,7 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
while (dstSize != 0) {
// Find the partial transfer size
tmpSize = std::min(dev().xferWrite().bufSize(), dstSize);
tmpSize = std::min(gpu().xferWrite().MaxSize(), dstSize);
amd::Coord3D dst(bufOffset, 0, 0);
amd::Coord3D copySize(tmpSize, 0, 0);
@@ -432,7 +434,7 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
}
}
}
gpu().addXferWrite(xferBuf);
gpu().xferWrite().Release(xferBuf);
}
return true;
@@ -576,8 +578,8 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory
entire, rowPitch, slicePitch);
} else {
// Use PAL path for a transfer
result =
gpuMem(srcMemory).partialMemCopyTo(gpu(), srcOrigin, dstOrigin, size, gpuMem(dstMemory));
result = gpuMem(srcMemory).partialMemCopyTo(gpu(), srcOrigin, dstOrigin,
size, gpuMem(dstMemory));
// Check if a HostBlit transfer is required
if (completeOperation_ && !result) {
@@ -607,9 +609,8 @@ bool DmaBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMem
KernelBlitManager::KernelBlitManager(VirtualGPU& gpu, Setup setup)
: DmaBlitManager(gpu, setup),
program_(NULL),
constantBuffer_(NULL),
xferBufferSize_(0),
lockXferOps_(NULL) {
lockXferOps_("Transfer Ops Lock", true) {
for (uint i = 0; i < BlitTotal; ++i) {
kernels_[i] = NULL;
}
@@ -636,17 +637,11 @@ KernelBlitManager::~KernelBlitManager() {
context_->release();
}
if (NULL != constantBuffer_) {
constantBuffer_->release();
}
for (uint i = 0; i < MaxXferBuffers; ++i) {
if (NULL != xferBuffers_[i]) {
xferBuffers_[i]->release();
}
}
delete lockXferOps_;
}
bool KernelBlitManager::create(amd::Device& device) {
@@ -693,19 +688,6 @@ bool KernelBlitManager::createProgram(Device& device) {
result = true;
} while (!result);
// Create an internal constant buffer
constantBuffer_ = new (*context_) amd::Buffer(*context_, CL_MEM_ALLOC_HOST_PTR, 4 * Ki);
if ((constantBuffer_ != NULL) && !constantBuffer_->create(NULL)) {
constantBuffer_->release();
constantBuffer_ = NULL;
return false;
} else if (constantBuffer_ == NULL) {
return false;
}
// Assign the constant buffer to the current virtual GPU
constantBuffer_->setVirtualDevice(&gpu());
if (dev().settings().xferBufSize_ > 0) {
xferBufferSize_ = dev().settings().xferBufSize_;
@@ -734,11 +716,6 @@ bool KernelBlitManager::createProgram(Device& device) {
}
}
lockXferOps_ = new amd::Monitor("Transfer Ops Lock", true);
if (NULL == lockXferOps_) {
return false;
}
return result;
}
@@ -1685,30 +1662,43 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
} else {
size_t pinSize;
FindPinSize(pinSize, size, rowPitch, slicePitch, gpuMem(dstMemory));
size_t partial = 0;
bool pinned;
size_t partial;
amd::Memory* amdMemory = pinHostMemory(srcHost, pinSize, partial);
if (amdMemory == NULL) {
// Force SW copy
result = HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
entire);
synchronize();
return result;
amd::Memory* amdMemory = nullptr;
Memory* srcMemory;
if (pinSize > gpu().xferWrite().MaxSize()) {
amdMemory = pinHostMemory(srcHost, pinSize, partial);
if (amdMemory == nullptr) {
// Force SW copy
result = HostBlitManager::writeImage(srcHost, dstMemory,
origin, size, rowPitch, slicePitch, entire);
synchronize();
return result;
}
// Get device memory for this virtual device
srcMemory = dev().getGpuMemory(amdMemory);
pinned = true;
}
else {
srcMemory = &gpu().xferWrite().Acquire(pinSize);
srcMemory->hostWrite(&gpu(), srcHost, 0, pinSize, Resource::NoWait);
pinned = false;
}
// Readjust destination offset
const amd::Coord3D srcOrigin(partial);
// Get device memory for this virtual device
Memory* srcMemory = dev().getGpuMemory(amdMemory);
// Copy image to buffer
result = copyBufferToImage(*srcMemory, dstMemory, srcOrigin, origin, size, entire, rowPitch,
slicePitch);
// Add pinned memory for a later release
gpu().addPinnedMem(amdMemory);
if (pinned) {
// Add pinned memory for a later release
gpu().addPinnedMem(amdMemory);
} else {
gpu().xferWrite().Release(*srcMemory);
}
}
synchronize();
@@ -2054,14 +2044,12 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern,
setArgument(kernels_[fillType], 0, sizeof(cl_mem), &mem);
setArgument(kernels_[fillType], 1, sizeof(cl_mem), NULL);
}
Memory* gpuCB = dev().getGpuMemory(constantBuffer_);
if (gpuCB == NULL) {
return false;
}
void* constBuf = gpuCB->map(&gpu(), Resource::WriteOnly);
Memory& gpuCB = gpu().xferWrite().Acquire(patternSize);
void* constBuf = gpuCB.map(&gpu(), Resource::NoWait);
memcpy(constBuf, pattern, patternSize);
gpuCB->unmap(&gpu());
setArgument(kernels_[fillType], 2, sizeof(cl_mem), &gpuCB);
gpuCB.unmap(&gpu());
Memory* pGpuCB = &gpuCB;
setArgument(kernels_[fillType], 2, sizeof(cl_mem), &pGpuCB);
cl_ulong offset = origin[0];
if (dwordAligned) {
patternSize /= sizeof(uint32_t);
@@ -2077,6 +2065,7 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern,
// Execute the blit
address parameters = kernels_[fillType]->parameters().values();
result = gpu().submitKernelInternal(ndrange, *kernels_[fillType], parameters);
gpu().xferWrite().Release(gpuCB);
}
synchronize();
@@ -2137,12 +2126,10 @@ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& ds
setArgument(kernels_[blitType], 1, sizeof(cl_mem), &mem);
// Program source origin
cl_ulong srcOffset = srcOrigin[0] / CopyBuffAlignment[i];
;
setArgument(kernels_[blitType], 2, sizeof(srcOffset), &srcOffset);
// Program destinaiton origin
cl_ulong dstOffset = dstOrigin[0] / CopyBuffAlignment[i];
;
setArgument(kernels_[blitType], 3, sizeof(dstOffset), &dstOffset);
cl_ulong copySize = size[0];
+2 -3
Просмотреть файл
@@ -352,7 +352,7 @@ class KernelBlitManager : public DmaBlitManager {
const void* data //!< Raw data pointer
) const;
virtual amd::Monitor* lockXfer() const { return lockXferOps_; }
virtual amd::Monitor* lockXfer() const { return &lockXferOps_; }
private:
static const size_t MaxXferBuffers = 2;
@@ -397,10 +397,9 @@ class KernelBlitManager : public DmaBlitManager {
amd::Program* program_; //!< GPU program obejct
amd::Kernel* kernels_[BlitTotal]; //!< GPU kernels for blit
amd::Memory* constantBuffer_; //!< An internal CB for blits
amd::Memory* xferBuffers_[MaxXferBuffers]; //!< Transfer buffers for images
size_t xferBufferSize_; //!< Transfer buffer size
amd::Monitor* lockXferOps_; //!< Lock transfer operation
mutable amd::Monitor lockXferOps_; //!< Lock transfer operation
};
static const char* BlitName[KernelBlitManager::BlitTotal] = {
+54 -5
Просмотреть файл
@@ -19,7 +19,7 @@ ManagedBuffer::ManagedBuffer(VirtualGPU& gpu, uint32_t size)
, wrtAddress_(nullptr) {}
// ================================================================================================
ManagedBuffer::~ManagedBuffer() {
void ManagedBuffer::release() {
for (auto it : buffers_) {
if (it->data() != nullptr) {
it->unmap(&gpu_);
@@ -72,13 +72,26 @@ address ManagedBuffer::reserve(uint32_t size, uint64_t* gpu_address) {
*gpu_address = buffers_[activeBuffer_]->vmAddress() + wrtOffset_;
address cpu_address = wrtAddress_ + wrtOffset_;
// Adjust the offset by the reserved size
wrtOffset_ += count;
return cpu_address;
}
// ================================================================================================
Memory& ManagedBuffer::reserveAtTheTop(uint32_t size)
{
// Get the next buffer in the list
++activeBuffer_;
activeBuffer_ %= MaxNumberOfBuffers;
// Make sure the buffer isn't busy
buffers_[activeBuffer_]->wait(gpu_);
wrtAddress_ = buffers_[activeBuffer_]->data();
wrtOffset_ = 0;
return *buffers_[activeBuffer_];
}
// ================================================================================================
ConstantBuffer::ConstantBuffer(ManagedBuffer& mbuf, uint32_t size)
: mbuf_(mbuf)
@@ -114,11 +127,47 @@ uint64_t ConstantBuffer::UploadDataToHw(uint32_t size) const {
// ================================================================================================
uint64_t ConstantBuffer::UploadDataToHw(const void* sysmem, uint32_t size) const {
uint64_t vm_address;
address cpu_address = mbuf_.reserve(size, &vm_address);
// Update memory with new CB data
memcpy(cpu_address, sysmem, size);
return vm_address;
}
// ================================================================================================
XferBuffer::XferBuffer(ManagedBuffer& mbuf, uint32_t size)
: mbuf_(mbuf)
, size_(size)
{}
// ================================================================================================
Memory& XferBuffer::Acquire(uint32_t size) const
{
uint64_t vm_address;
// Reserve space in the managed buffer
address cpu_address = mbuf_.reserve(size, &vm_address);
// Update memory with new CB data
memcpy(cpu_address, sysmem, size);
return vm_address;
// Create a view for access
Memory* mem = new Memory(mbuf_.gpu().dev(), static_cast<size_t>(size));
Resource::ViewParams params = {};
params.gpu_ = &mbuf_.gpu();
params.offset_ = vm_address - mbuf_.vmAddress();
params.size_ = size;
params.resource_ = mbuf_.activeMemory();
if (nullptr == mem || !mem->create(Resource::View, &params)) {
delete mem;
// If the suballocaiton failed for some reason, then return the top of the active buffer
return mbuf_.reserveAtTheTop(size);
}
return *mem;
}
// ================================================================================================
void XferBuffer::Release(Memory& mem) const
{
// Delete view
if (mem.desc().type_ == Resource::View) {
delete &mem;
}
}
} // namespace pal
+49 -8
Просмотреть файл
@@ -9,19 +9,20 @@
namespace pal {
//! Managed buffer (staging or constant)
class ManagedBuffer : public amd::HeapObject {
class ManagedBuffer : public amd::EmbeddedObject {
public:
//! Constructor for the ConstBuffer class
ManagedBuffer(VirtualGPU& gpu, //!< Virtual GPU device object
uint32_t size //!< size of the managed buffers in bytes
);
~ManagedBuffer() {}
//! Destructor for the ConstBuffer class
~ManagedBuffer();
//! Creates the real HW constant buffer
//! Creates the managed buffers
bool create(Resource::MemoryType type);
//! Release the managed buffers
void release();
/*! \brief Uploads current constant buffer data from sysMemCopy_ to HW
*
* \return True if the data upload was succesful
@@ -29,6 +30,9 @@ class ManagedBuffer : public amd::HeapObject {
address reserve(uint32_t size, //!< real data size for upload
uint64_t* gpu_address);
//! Reserves memory at the top of the active buffer
Memory& reserveAtTheTop(uint32_t size);
//! Returns CB size
uint32_t size() const { return size_; }
@@ -40,6 +44,9 @@ class ManagedBuffer : public amd::HeapObject {
uint64_t vmAddress() const { return buffers_[activeBuffer_]->vmAddress(); }
//! Returns VirtualGPU object this managed resource associated
VirtualGPU& gpu() const { return gpu_; }
private:
//! The maximum number of the managed buffers
static constexpr uint32_t MaxNumberOfBuffers = 3;
@@ -63,13 +70,13 @@ class ConstantBuffer : public amd::HeapObject {
public:
//! Constructor for the ConstBuffer class
ConstantBuffer(ManagedBuffer& mbuf, //!< Managed buffer
uint32_t size
uint32_t size //!< Max size of the constant buffer
);
//! Destructor for the ConstBuffer class
~ConstantBuffer();
//! Creates the real HW constant buffer
//! Creates the HW constant buffer
bool Create();
/*! \brief Uploads current constant buffer data from sysMemCopy_ to HW
@@ -88,7 +95,7 @@ public:
) const;
//! Returns a pointer to the system memory copy for CB
address SysMemCopy(uint32_t size = 0) const { return sys_mem_copy_; }
address SysMemCopy() const { return sys_mem_copy_; }
//! Returns active GPU buffer
Memory* ActiveMemory() const { return mbuf_.activeMemory(); }
@@ -105,4 +112,38 @@ private:
uint32_t size_; //!< Constant buffer size
};
//! Staging buffer
class XferBuffer : public amd::EmbeddedObject {
public:
//! Constructor for the ConstBuffer class
XferBuffer(ManagedBuffer& mbuf, //!< Managed buffer
uint32_t size //!< Maximum size of the transfer buffer
);
//! Destructor for the ConstBuffer class
~XferBuffer() {}
/*! \brief Acquires free memory from the managed buffer
*
* \return GPU memory object associated with free memory
*/
Memory& Acquire(uint32_t size //!< data size for transfers
) const;
//! Releases memory object used in the staging transfer
void Release(Memory& mem //!< Memory object for release
) const;
size_t MaxSize() const { return static_cast<size_t>(size_); }
private:
//! Disable copy constructor
XferBuffer(const XferBuffer&) = delete;
//! Disable operator=
XferBuffer& operator=(const XferBuffer&) = delete;
ManagedBuffer& mbuf_; //!< Managed buffer on GPU
uint32_t size_; //!< Mx staging buffer size
};
/*@}*/} // namespace pal
-17
Просмотреть файл
@@ -681,7 +681,6 @@ Device::Device()
scratchAlloc_(nullptr),
mapCacheOps_(nullptr),
xferRead_(nullptr),
xferWrite_(nullptr),
mapCache_(nullptr),
resourceCache_(nullptr),
numComputeEngines_(0),
@@ -732,7 +731,6 @@ Device::~Device() {
// Destroy temporary buffers for read/write
delete xferRead_;
delete xferWrite_;
// Destroy resource cache
delete resourceCache_;
@@ -986,21 +984,6 @@ bool Device::initializeHeapResources() {
}
if (settings().stagedXferSize_ != 0) {
// Initialize staged write buffers
if (settings().stagedXferWrite_) {
Resource::MemoryType type;
if (settings().stagingWritePersistent_ && !settings().disablePersistent_) {
type = Resource::Persistent;
} else {
type = Resource::RemoteUSWC;
}
xferWrite_ = new XferBuffers(*this, type, amd::alignUp(settings().stagedXferSize_, 4 * Ki));
if ((xferWrite_ == nullptr) || !xferWrite_->create()) {
LogError("Couldn't allocate transfer buffer objects for read");
return false;
}
}
// Initialize staged read buffers
if (settings().stagedXferRead_) {
xferRead_ = new XferBuffers(*this, Resource::Remote,
-4
Просмотреть файл
@@ -396,9 +396,6 @@ class Device : public NullDevice {
pal::Memory* createScratchBuffer(size_t size //!< Size of buffer
) const;
//! Returns transfer buffer object
XferBuffers& xferWrite() const { return *xferWrite_; }
//! Returns transfer buffer object
XferBuffers& xferRead() const { return *xferRead_; }
@@ -588,7 +585,6 @@ class Device : public NullDevice {
amd::Monitor* scratchAlloc_; //!< Lock to serialise scratch allocation
amd::Monitor* mapCacheOps_; //!< Lock to serialise cache for the map resources
XferBuffers* xferRead_; //!< Transfer buffers read
XferBuffers* xferWrite_; //!< Transfer buffers write
std::vector<amd::Memory*>* mapCache_; //!< Map cache info structure
ResourceCache* resourceCache_; //!< Resource cache
uint numComputeEngines_; //!< The number of available compute engines
+3 -3
Просмотреть файл
@@ -86,19 +86,19 @@ void Segment::copy(size_t offset, const void* src, size_t size) {
if (cpuAccess_ != nullptr) {
amd::Os::fastMemcpy(cpuAddress(offset), src, size);
} else {
amd::ScopedLock k(gpuAccess_->dev().xferMgr().lockXfer());
VirtualGPU& gpu = *gpuAccess_->dev().xferQueue();
Memory& xferBuf = gpuAccess_->dev().xferWrite().acquire();
Memory& xferBuf = gpu.xferWrite().Acquire(size);
size_t tmpSize = std::min(static_cast<size_t>(xferBuf.size()), size);
size_t srcOffs = 0;
while (size != 0) {
amd::ScopedLock k(gpuAccess_->dev().xferMgr().lockXfer());
xferBuf.hostWrite(&gpu, reinterpret_cast<const_address>(src) + srcOffs, 0, tmpSize);
xferBuf.partialMemCopyTo(gpu, 0, (offset + srcOffs), tmpSize, *gpuAccess_, false, true);
size -= tmpSize;
srcOffs += tmpSize;
tmpSize = std::min(static_cast<size_t>(xferBuf.size()), size);
}
gpuAccess_->dev().xferWrite().release(gpu, xferBuf);
gpu.xferWrite().Release(xferBuf);
gpu.waitAllEngines();
}
}
+6 -9
Просмотреть файл
@@ -1046,8 +1046,8 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
uint viewFlags = 0;
Pal::ChannelMapping channels;
Pal::ChNumFormat format = dev().getPalFormat(desc().format_, &channels);
// Set the initial offset value for any resource to 0.
// Note: Runtime can call create() more than once, if the initial memory type failed
// Set the initial offset value for any resource to 0.
// Note: Runtime can call create() more than once, if the initial memory type failed
offset_ = 0;
// This is a thread safe operation
@@ -1096,7 +1096,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
if (!desc_.buffer_) {
return CreateImage(params);
}
if (memoryType() == Pinned) {
return CreatePinned(params);
}
@@ -1112,6 +1112,7 @@ bool Resource::create(MemoryType memType, CreateParams* params) {
offset_ += viewOwner_->offset();
if (viewOwner_->data() != nullptr) {
address_ = viewOwner_->data() + view->offset_;
mapCount_++;
}
memRef_ = viewOwner_->memRef_;
memRef_->retain();
@@ -1177,11 +1178,6 @@ void Resource::free()
return;
}
// Sanity check for the map calls
if ((mapCount_ != 0) && (memoryType() != Remote) &&
(memoryType() != RemoteUSWC) && (memoryType() != Persistent)) {
LogWarning("Resource wasn't unlocked, but destroyed!");
}
const bool wait =
(memoryType() != ImageView) && (memoryType() != ImageBuffer) && (memoryType() != View);
@@ -1206,7 +1202,7 @@ void Resource::free()
// Destroy PAL resource
if (iMem() != 0) {
if (mapCount_ != 0) {
if (mapCount_ != 0 && wait) {
if ((memoryType() != Remote) && (memoryType() != RemoteUSWC)) {
//! @note: This is a workaround for bad applications that don't unmap memory
unmap(nullptr);
@@ -1738,6 +1734,7 @@ void* Resource::map(VirtualGPU* gpu, uint flags, uint startLayer, uint numLayers
address_ = reinterpret_cast<uint8_t*>(memRef_->cpuAddress_) + subOffset_;
} else {
address_ = gpuMemoryMap(&desc_.pitch_, flags, iMem());
address_ = reinterpret_cast<address>(address_) + offset_;
}
if (address_ == nullptr) {
LogError("cal::ResMap failed!");
+6 -28
Просмотреть файл
@@ -542,23 +542,6 @@ bool VirtualGPU::DmaFlushMgmt::isCbReady(VirtualGPU& gpu, uint64_t threads, uint
return cbReady;
}
void VirtualGPU::addXferWrite(Memory& memory) {
if (xferWriteBuffers_.size() > 7) {
dev().xferWrite().release(*this, *xferWriteBuffers_.front());
xferWriteBuffers_.erase(xferWriteBuffers_.begin());
}
// Delay destruction
xferWriteBuffers_.push_back(&memory);
}
void VirtualGPU::releaseXferWrite() {
for (auto& memory : xferWriteBuffers_) {
dev().xferWrite().release(*this, *memory);
}
xferWriteBuffers_.resize(0);
}
void VirtualGPU::addPinnedMem(amd::Memory* mem) {
if (nullptr == findPinnedMem(mem->getHostMem(), mem->getSize())) {
if (pinnedMems_.size() > 7) {
@@ -718,7 +701,8 @@ VirtualGPU::VirtualGPU(Device& device)
printfDbgHSA_(nullptr),
tsCache_(nullptr),
dmaFlushMgmt_(device),
writeBuffer_(nullptr),
managedBuffer_(*this, device.settings().stagedXferSize_ + 32 * Ki),
writeBuffer_(managedBuffer_, device.settings().stagedXferSize_),
hwRing_(0),
readjustTimeGPU_(0),
lastTS_(nullptr),
@@ -834,10 +818,7 @@ bool VirtualGPU::create(bool profiling, uint deviceQueueSize, uint rtCUs,
Unimplemented();
}
writeBuffer_ = new ManagedBuffer(*this, dev().settings().stagedXferSize_);
if ((writeBuffer_ == nullptr) || !writeBuffer_->create(Resource::RemoteUSWC)) {
// We failed to create a constant buffer
delete writeBuffer_;
if (!managedBuffer_.create(Resource::RemoteUSWC)) {
return false;
}
@@ -963,7 +944,7 @@ VirtualGPU::~VirtualGPU() {
delete constBufs_[i];
}
delete writeBuffer_;
managedBuffer_.release();
//! @todo Temporarily keep the buffer mapped for debug purpose
if (nullptr != schedParams_) {
@@ -2758,9 +2739,6 @@ bool VirtualGPU::waitAllEngines(CommandBatch* cb) {
earlyDone &= isDone(&events[i]);
}
// Release all transfer buffers on this command queue
releaseXferWrite();
// Rlease all pinned memory
releasePinnedMem();
@@ -2813,14 +2791,14 @@ void VirtualGPU::waitEventLock(CommandBatch* cb) {
}
bool VirtualGPU::allocConstantBuffers() {
// Allocate constant buffers.
// Allocate constant buffers.
// Use double size, reported to the app to account for internal arguments
const uint32_t MinCbSize = 2 * dev().info().maxParameterSize_;
uint i;
// Create/reallocate constant buffer resources
for (i = 0; i < MaxConstBuffersArguments; ++i) {
ConstantBuffer* constBuf = new ConstantBuffer(*writeBuffer_, MinCbSize);
ConstantBuffer* constBuf = new ConstantBuffer(managedBuffer_, MinCbSize);
if ((constBuf != nullptr) && constBuf->Create()) {
addConstBuffer(constBuf);
+5 -8
Просмотреть файл
@@ -375,8 +375,8 @@ class VirtualGPU : public device::VirtualDevice {
bool pfpaDoppCmd //!< is a submission for the pre-present primary
);
//! Adds a stage write buffer into a list
void addXferWrite(Memory& memory);
//! Return xfer buffer for staging operations
const XferBuffer& xferWrite() const { return writeBuffer_; }
//! Adds a pinned memory object into a map
void addPinnedMem(amd::Memory* mem);
@@ -518,9 +518,6 @@ class VirtualGPU : public device::VirtualDevice {
//! Allocates constant buffers
bool allocConstantBuffers();
//! Releases stage write buffers
void releaseXferWrite();
//! Allocate hsaQueueMem_
bool allocHsaQueueMem();
@@ -594,11 +591,11 @@ class VirtualGPU : public device::VirtualDevice {
DmaFlushMgmt dmaFlushMgmt_; //!< DMA flush management
std::vector<Memory*> xferWriteBuffers_; //!< Stage write buffers
std::vector<amd::Memory*> pinnedMems_; //!< Pinned memory list
ManagedBuffer* writeBuffer_; //!< Managed write buffer
constbufs_t constBufs_; //!< constant buffers
ManagedBuffer managedBuffer_; //!< Managed write buffer
constbufs_t constBufs_; //!< constant buffers
XferBuffer writeBuffer_; //!< Transfer/staging buffer for uploads
typedef std::queue<CommandBatch*> CommandBatchQueue;
CommandBatchQueue cbQueue_; //!< Queue of command batches