Fix a deadlock in ROCr backend

When OCL ROCr backend performs CL_MEM_COPY_HOST_PTR it may attempt to have access to amd::Memory object it's currently creating, but it's not ready yet. The logic creates a temporary dummy object to perform a copy transfer. The new change will make sure runtime skips allocation of the same device::Memory object second time. Change-Id: I14c6a00a3941fdcaa6aea299e9f096e4c3f5cadf [ROCm/clr commit: 1fde842703]
2020-12-07 23:01:12 -05:00
@@ -1681,7 +1681,7 @@ Memory* Device::getRocMemory(amd::Memory* mem) const {
  return static_cast<roc::Memory*>(mem->getDeviceMemory(*this));
 }

-
+// ================================================================================================
 device::Memory* Device::createMemory(amd::Memory& owner) const {
  roc::Memory* memory = nullptr;
  if (owner.asBuffer()) {
@@ -1712,17 +1712,18 @@ device::Memory* Device::createMemory(amd::Memory& owner) const {
    // Pipe initialize in order read_idx, write_idx, end_idx. Refer clk_pipe_t structure.
    // Init with 3 DWORDS for 32bit addressing and 6 DWORDS for 64bit
    size_t pipeInit[3] = { 0, 0, owner.asPipe()->getMaxNumPackets() };
-    xferMgr().writeBuffer((void *)pipeInit, *memory, amd::Coord3D(0), amd::Coord3D(sizeof(pipeInit)));
+    xferMgr().writeBuffer(pipeInit, *memory, amd::Coord3D(0), amd::Coord3D(sizeof(pipeInit)));
  }

  // Transfer data only if OCL context has one device.
  // Cache coherency layer will update data for multiple devices
  if (!memory->isHostMemDirectAccess() && owner.asImage() && (owner.parent() == nullptr) &&
-      (owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) && (owner.getContext().devices().size() == 1)) {
+      (owner.getMemFlags() & CL_MEM_COPY_HOST_PTR) &&
+      (owner.getContext().devices().size() == 1)) {
    // To avoid recurssive call to Device::createMemory, we perform
-    // data transfer to the view of the image.
-    amd::Image* imageView = owner.asImage()->createView(
-        owner.getContext(), owner.asImage()->getImageFormat(), xferQueue());
+    // data transfer to the view of the image
+    amd::Image* imageView = owner.asImage()->createView(owner.getContext(),
+        owner.asImage()->getImageFormat(), xferQueue());

    if (imageView == nullptr) {
      LogError("[OCL] Fail to allocate view of image object");
@@ -1167,10 +1167,10 @@ Image* Image::createView(const Context& context, const Format& format, device::V
  // Find the image dimensions and create a corresponding object
  view = new (context) Image(format, *this, baseMipLevel, flags);

-  // Set GPU virtual device for this view
-  view->setVirtualDevice(vDev);
-
  if (view != nullptr) {
+    // Set GPU virtual device for this view
+    view->setVirtualDevice(vDev);
+
    view->resetAllocationState();

    // Initialize array of the device memory pointers
@@ -1178,7 +1178,14 @@ Image* Image::createView(const Context& context, const Format& format, device::V

    // Check if runtime has to allocate memory
    if ((context.devices().size() == 1) || DISABLE_DEFERRED_ALLOC) {
-      device::Memory* mem = view->getDeviceMemory(*context.devices()[0]);
+      for (uint i = 0; i < numDevices_; ++i) {
+        // Make sure the parent's device memory is avaialbe
+        if ((deviceMemories_[i].ref_ == context.devices()[i]) &&
+            (deviceMemories_[i].value_ != nullptr)) {
+          device::Memory* mem = view->getDeviceMemory(*context.devices()[i]);
+          break;
+        }
+      }
    }
  }