Fix a deadlock in ROCr backend

When OCL ROCr backend performs CL_MEM_COPY_HOST_PTR it may attempt
to have access to amd::Memory object it's currently creating,
but it's not ready yet. The logic creates a temporary dummy object
to perform a copy transfer. The new change will make sure runtime
skips allocation of the same device::Memory object second time.

Change-Id: I14c6a00a3941fdcaa6aea299e9f096e4c3f5cadf


[ROCm/clr commit: 1fde842703]
此提交包含在:
German Andryeyev
2020-12-07 23:01:12 -05:00
父節點 caa75dd35f
當前提交 b301590fb9
共有 2 個檔案被更改,包括 18 行新增10 行删除
+7 -6
查看文件
@@ -1681,7 +1681,7 @@ Memory* Device::getRocMemory(amd::Memory* mem) const {
return static_cast<roc::Memory*>(mem->getDeviceMemory(*this));
}
// ================================================================================================
device::Memory* Device::createMemory(amd::Memory& owner) const {
roc::Memory* memory = nullptr;
if (owner.asBuffer()) {
@@ -1712,17 +1712,18 @@ device::Memory* Device::createMemory(amd::Memory& owner) const {
// Pipe initialize in order read_idx, write_idx, end_idx. Refer clk_pipe_t structure.
// Init with 3 DWORDS for 32bit addressing and 6 DWORDS for 64bit
size_t pipeInit[3] = { 0, 0, owner.asPipe()->getMaxNumPackets() };
xferMgr().writeBuffer((void *)pipeInit, *memory, amd::Coord3D(0), amd::Coord3D(sizeof(pipeInit)));
xferMgr().writeBuffer(pipeInit, *memory, amd::Coord3D(0), amd::Coord3D(sizeof(pipeInit)));
}
// Transfer data only if OCL context has one device.
// Cache coherency layer will update data for multiple devices
if (!memory->isHostMemDirectAccess() && owner.asImage() && (owner.parent() == nullptr) &&
(owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) && (owner.getContext().devices().size() == 1)) {
(owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) &&
(owner.getContext().devices().size() == 1)) {
// To avoid recurssive call to Device::createMemory, we perform
// data transfer to the view of the image.
amd::Image* imageView = owner.asImage()->createView(
owner.getContext(), owner.asImage()->getImageFormat(), xferQueue());
// data transfer to the view of the image
amd::Image* imageView = owner.asImage()->createView(owner.getContext(),
owner.asImage()->getImageFormat(), xferQueue());
if (imageView == nullptr) {
LogError("[OCL] Fail to allocate view of image object");
+11 -4
查看文件
@@ -1167,10 +1167,10 @@ Image* Image::createView(const Context& context, const Format& format, device::V
// Find the image dimensions and create a corresponding object
view = new (context) Image(format, *this, baseMipLevel, flags);
// Set GPU virtual device for this view
view->setVirtualDevice(vDev);
if (view != nullptr) {
// Set GPU virtual device for this view
view->setVirtualDevice(vDev);
view->resetAllocationState();
// Initialize array of the device memory pointers
@@ -1178,7 +1178,14 @@ Image* Image::createView(const Context& context, const Format& format, device::V
// Check if runtime has to allocate memory
if ((context.devices().size() == 1) || DISABLE_DEFERRED_ALLOC) {
device::Memory* mem = view->getDeviceMemory(*context.devices()[0]);
for (uint i = 0; i < numDevices_; ++i) {
// Make sure the parent's device memory is avaialbe
if ((deviceMemories_[i].ref_ == context.devices()[i]) &&
(deviceMemories_[i].value_ != nullptr)) {
device::Memory* mem = view->getDeviceMemory(*context.devices()[i]);
break;
}
}
}
}