P4 to Git Change 1561015 by gandryey@gera-w8 on 2018/05/29 13:10:06

SWDEV-79445 - OCL generic changes and code clean-up Optimize memory dependency tracking logic: 1. Add modified_ filed to the event object to track memory writes into device memory. 2. Check memory dependency only if a write operation is currently requested or previously performed. Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldefs.hpp#35 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.cpp#66 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palresource.hpp#24 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#100 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#52 edit
2018-05-29 13:18:29 -04:00
commit 51a4d1865c
@@ -44,22 +44,27 @@ struct HwDbgKernelInfo {
 enum EngineType { MainEngine = 0, SdmaEngine, AllEngines };

 struct GpuEvent {
-  static const unsigned int InvalidID = ((1 << 30) - 1);
+  static constexpr uint32_t InvalidID = ((1 << 30) - 1);

  struct {
-    uint32_t id : 31;         ///< actual event id
-    uint32_t engineId_ : 1;   ///< type of the id
+    uint32_t id_ : 30;        ///< Actual event id
+    uint32_t modified_ : 1;   ///< Resource associated with the event was modified
+    uint32_t engineId_ : 1;   ///< Type of the id
  };
  //! GPU event default constructor
-  GpuEvent() : id(InvalidID), engineId_(MainEngine) {}
+  GpuEvent() : id_(InvalidID), engineId_(MainEngine), modified_(false) {}
  //! GPU event constructor
-  GpuEvent(uint evt) : id(evt), engineId_(MainEngine) {}
+  GpuEvent(uint evt) : id_(evt), engineId_(MainEngine), modified_(false) {}

  //! Returns true if the current event is valid
-  bool isValid() const { return (id != InvalidID) ? true : false; }
+  bool isValid() const { return (id_ != InvalidID) ? true : false; }

  //! Set invalid event id
-  void invalidate() { id = InvalidID; }
+  void invalidate() { id_ = InvalidID; }
+
+  // Overwrite default assign operator to preserve modified_ field
+  GpuEvent& operator=(const GpuEvent& evt)
+    { id_ = evt.id_; engineId_ = evt.engineId_; return *this; }
 };

 /*! \addtogroup PAL
@@ -1665,6 +1665,31 @@ GpuEvent* Resource::getGpuEvent(const VirtualGPU& gpu) const {
  return &events_[idx];
 }

+// ================================================================================================
+void Resource::setModified(VirtualGPU& gpu, bool modified) const {
+  uint idx = gpu.index();
+  assert(idx < events_.size());
+  events_[idx].modified_ = modified;
+
+  // If current resource is a view, then update the parent as well
+  if (viewOwner_ != nullptr) {
+    viewOwner_->setModified(gpu, modified);
+  }
+}
+
+// ================================================================================================
+bool Resource::isModified(VirtualGPU& gpu) const {
+  uint idx = gpu.index();
+  assert(idx < events_.size());
+  bool modified = events_[idx].modified_;
+
+  // If current resource is a view, then get the parent state as well
+  if (viewOwner_ != nullptr) {
+    modified |= viewOwner_->isModified(gpu);
+  }
+  return modified;
+}
+
 // ================================================================================================
 void Resource::palFree() const {
  if (desc().type_ == OGLInterop) {
@@ -382,6 +382,12 @@ class Resource : public amd::HeapObject {
    }
  }

+  //! Update the modified field of the event, meaning the resource was updated
+  void setModified(VirtualGPU& gpu, bool modified) const;
+
+  //! Update the modified field of the event, meaning the resource was updated
+  bool isModified(VirtualGPU& gpu) const;
+
 protected:
  /*! \brief Creates a PAL iamge object, associated with the resource
  *
@@ -427,24 +427,26 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
  uint64_t curStart = memory->vmAddress();
  uint64_t curEnd = curStart + memory->size();

-  // Loop through all memory objects in the queue and find dependency
-  // @note don't include objects from the current kernel
-  for (size_t j = 0; j < endMemObjectsInQueue_; ++j) {
-    // Check if the queue already contains this mem object and
-    // GPU operations aren't readonly
-    uint64_t busyStart = memObjectsInQueue_[j].start_;
-    uint64_t busyEnd = memObjectsInQueue_[j].end_;
+  if (memory->isModified(gpu) || !readOnly) {
+    // Loop through all memory objects in the queue and find dependency
+    // @note don't include objects from the current kernel
+    for (size_t j = 0; j < endMemObjectsInQueue_; ++j) {
+      // Check if the queue already contains this mem object and
+      // GPU operations aren't readonly
+      uint64_t busyStart = memObjectsInQueue_[j].start_;
+      uint64_t busyEnd = memObjectsInQueue_[j].end_;

-    // Check if the start inside the busy region
-    if ((((curStart >= busyStart) && (curStart < busyEnd)) ||
-         // Check if the end inside the busy region
-         ((curEnd > busyStart) && (curEnd <= busyEnd)) ||
-         // Check if the start/end cover the busy region
-         ((curStart <= busyStart) && (curEnd >= busyEnd))) &&
-        // If the buys region was written or the current one is for write
-        (!memObjectsInQueue_[j].readOnly_ || !readOnly)) {
-      flushL1Cache = true;
-      break;
+      // Check if the start inside the busy region
+      if ((((curStart >= busyStart) && (curStart < busyEnd)) ||
+           // Check if the end inside the busy region
+           ((curEnd > busyStart) && (curEnd <= busyEnd)) ||
+           // Check if the start/end cover the busy region
+           ((curStart <= busyStart) && (curEnd >= busyEnd))) &&
+          // If the buys region was written or the current one is for write
+          (!memObjectsInQueue_[j].readOnly_ || !readOnly)) {
+        flushL1Cache = true;
+        break;
+      }
    }
  }

@@ -471,6 +473,8 @@ void VirtualGPU::MemoryDependency::validate(VirtualGPU& gpu, const Memory* memor
  memObjectsInQueue_[numMemObjectsInQueue_].end_ = curEnd;
  memObjectsInQueue_[numMemObjectsInQueue_].readOnly_ = readOnly;
  numMemObjectsInQueue_++;
+  // Mark resource as modified
+  memory->setModified(gpu, !readOnly);
 }

 void VirtualGPU::MemoryDependency::clear(bool all) {
@@ -1955,7 +1959,7 @@ void VirtualGPU::PostDeviceEnqueue(
    uint64_t vmParentWrap,
    GpuEvent* gpuEvent)
 {
-  uint32_t id  = gpuEvent->id;
+  uint32_t id  = gpuEvent->id_;
  amd::DeviceQueue* defQueue = kernel.program().context().defDeviceQueue(dev());

  // Make sure exculsive access to the device queue
@@ -2036,7 +2040,7 @@ void VirtualGPU::PostDeviceEnqueue(
    iCmd()->CmdVirtualQueueHandshake(vmParentWrap + offsetof(AmdAqlWrap, state), AQL_WRAP_DONE,
      vmParentWrap + offsetof(AmdAqlWrap, child_counter),
      signalAddr, dev().settings().useDeviceQueue_);
-    if (id != gpuEvent->id) {
+    if (id != gpuEvent->id_) {
        LogError("Something is wrong. ID mismatch!\n");
    }
    eventEnd(MainEngine, *gpuEvent);
@@ -2133,7 +2137,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const

  for (int iter = 0; iter < iteration; ++iter) {
    GpuEvent gpuEvent(queues_[MainEngine]->cmdBufId());
-    uint32_t id = gpuEvent.id;
+    uint32_t id = gpuEvent.id_;
    // Reset global size for dimension dim if split is needed
    if (dim != -1) {
      newOffset[dim] = sizes.offset()[dim] + globalStep * iter;
@@ -2184,7 +2188,7 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
    if (profiling() || state_.profileEnabled_) {
      addBarrier();
    }
-    if (id != gpuEvent.id) {
+    if (id != gpuEvent.id_) {
      LogError("Something is wrong. ID mismatch!\n");
    }
    eventEnd(MainEngine, gpuEvent);
@@ -463,11 +463,11 @@ class VirtualGPU : public device::VirtualDevice {
    constexpr bool End = false;
    if (forceExec) {
      constexpr bool ForceFlush = true;
-      event.id = queues_[engId]->submit(ForceFlush);
+      event.id_ = queues_[engId]->submit(ForceFlush);
      profileEvent(engId, End);
    } else {
      profileEvent(engId, End);
-      event.id = queues_[engId]->submit(GPU_FLUSH_ON_EXECUTION);
+      event.id_ = queues_[engId]->submit(GPU_FLUSH_ON_EXECUTION);
    }
    event.engineId_ = engId;
  }
@@ -475,7 +475,7 @@ class VirtualGPU : public device::VirtualDevice {
  void waitForEvent(GpuEvent* event) const {
    if (event->isValid()) {
      assert(event->engineId_ < AllEngines);
-      queues_[event->engineId_]->waitForEvent(event->id);
+      queues_[event->engineId_]->waitForEvent(event->id_);
      event->invalidate();
    }
  }
@@ -483,7 +483,7 @@ class VirtualGPU : public device::VirtualDevice {
  bool isDone(GpuEvent* event) {
    if (event->isValid()) {
      assert(event->engineId_ < AllEngines);
-      if (queues_[event->engineId_]->isDone(event->id)) {
+      if (queues_[event->engineId_]->isDone(event->id_)) {
        event->invalidate();
        return true;
      }
@@ -623,9 +623,8 @@ class VirtualGPU : public device::VirtualDevice {
 };

 inline void VirtualGPU::addVmMemory(const Memory* memory) {
-  GpuEvent event(queues_[MainEngine]->cmdBufId());
  queues_[MainEngine]->addCmdMemRef(memory->memRef());
-  memory->setBusy(*this, event);
+  memory->setBusy(*this, queues_[MainEngine]->cmdBufId());
 }

 inline void VirtualGPU::AddKernel(const amd::Kernel& kernel) const {