From b301590fb99f9a6a488ce30b8ae1c9f556b29cd6 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Mon, 7 Dec 2020 23:01:12 -0500 Subject: [PATCH] Fix a deadlock in ROCr backend When OCL ROCr backend performs CL_MEM_COPY_HOST_PTR it may attempt to have access to amd::Memory object it's currently creating, but it's not ready yet. The logic creates a temporary dummy object to perform a copy transfer. The new change will make sure runtime skips allocation of the same device::Memory object second time. Change-Id: I14c6a00a3941fdcaa6aea299e9f096e4c3f5cadf [ROCm/clr commit: 1fde842703ac01234213019fca5f6441098d8745] --- projects/clr/rocclr/device/rocm/rocdevice.cpp | 13 +++++++------ projects/clr/rocclr/platform/memory.cpp | 15 +++++++++++---- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index 40adf2208e..7ed5ffc5e6 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -1681,7 +1681,7 @@ Memory* Device::getRocMemory(amd::Memory* mem) const { return static_cast(mem->getDeviceMemory(*this)); } - +// ================================================================================================ device::Memory* Device::createMemory(amd::Memory& owner) const { roc::Memory* memory = nullptr; if (owner.asBuffer()) { @@ -1712,17 +1712,18 @@ device::Memory* Device::createMemory(amd::Memory& owner) const { // Pipe initialize in order read_idx, write_idx, end_idx. Refer clk_pipe_t structure. // Init with 3 DWORDS for 32bit addressing and 6 DWORDS for 64bit size_t pipeInit[3] = { 0, 0, owner.asPipe()->getMaxNumPackets() }; - xferMgr().writeBuffer((void *)pipeInit, *memory, amd::Coord3D(0), amd::Coord3D(sizeof(pipeInit))); + xferMgr().writeBuffer(pipeInit, *memory, amd::Coord3D(0), amd::Coord3D(sizeof(pipeInit))); } // Transfer data only if OCL context has one device. // Cache coherency layer will update data for multiple devices if (!memory->isHostMemDirectAccess() && owner.asImage() && (owner.parent() == nullptr) && - (owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) && (owner.getContext().devices().size() == 1)) { + (owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) && + (owner.getContext().devices().size() == 1)) { // To avoid recurssive call to Device::createMemory, we perform - // data transfer to the view of the image. - amd::Image* imageView = owner.asImage()->createView( - owner.getContext(), owner.asImage()->getImageFormat(), xferQueue()); + // data transfer to the view of the image + amd::Image* imageView = owner.asImage()->createView(owner.getContext(), + owner.asImage()->getImageFormat(), xferQueue()); if (imageView == nullptr) { LogError("[OCL] Fail to allocate view of image object"); diff --git a/projects/clr/rocclr/platform/memory.cpp b/projects/clr/rocclr/platform/memory.cpp index 454b0c131c..940995e81c 100644 --- a/projects/clr/rocclr/platform/memory.cpp +++ b/projects/clr/rocclr/platform/memory.cpp @@ -1167,10 +1167,10 @@ Image* Image::createView(const Context& context, const Format& format, device::V // Find the image dimensions and create a corresponding object view = new (context) Image(format, *this, baseMipLevel, flags); - // Set GPU virtual device for this view - view->setVirtualDevice(vDev); - if (view != nullptr) { + // Set GPU virtual device for this view + view->setVirtualDevice(vDev); + view->resetAllocationState(); // Initialize array of the device memory pointers @@ -1178,7 +1178,14 @@ Image* Image::createView(const Context& context, const Format& format, device::V // Check if runtime has to allocate memory if ((context.devices().size() == 1) || DISABLE_DEFERRED_ALLOC) { - device::Memory* mem = view->getDeviceMemory(*context.devices()[0]); + for (uint i = 0; i < numDevices_; ++i) { + // Make sure the parent's device memory is avaialbe + if ((deviceMemories_[i].ref_ == context.devices()[i]) && + (deviceMemories_[i].value_ != nullptr)) { + device::Memory* mem = view->getDeviceMemory(*context.devices()[i]); + break; + } + } } }