P4 to Git Change 1580311 by gandryey@gera-w8 on 2018/07/13 14:24:01

SWDEV-154474 - [Task]: TAN Dual Realtime Queue Enhancement For Radeon Rays - Add support for the second RT queue Affected files ... ... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_command.cpp#16 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#422 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#97 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#32 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#114 edit [ROCm/clr commit: 6b409a027b]
2018-07-13 14:32:29 -04:00
@@ -422,10 +422,15 @@ bool VirtualGPU::create(bool profiling, uint rtCUs, uint deviceQueueSize,
                                          : (dev().getFirstAvailableComputeEngineID() + idx)));

      } else {
-        if (priority == amd::CommandQueue::Priority::Medium) {
+        if ((priority == amd::CommandQueue::Priority::Medium) &&
+            (amd::CommandQueue::RealTimeDisabled == rtCUs)) {
          engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_MEDIUM_PRIORITY));
        } else {
-          engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_RT));
+          if (priority == amd::CommandQueue::Priority::Medium) {
+            engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_RT));
+          } else {
+            engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_RT));
+          }
        }
        //!@todo This is not a generic solution and
        // may have issues with > 8 queues
@@ -717,7 +717,6 @@ Device::Device()
      mapCache_(nullptr),
      resourceCache_(nullptr),
      numComputeEngines_(0),
-      numExclusiveComputeEngines_(0),
      numDmaEngines_(0),
      heapInitComplete_(false),
      xferQueue_(nullptr),
@@ -822,11 +821,18 @@ bool Device::create(Pal::IDevice* device) {
  if (properties().engineProperties[Pal::EngineTypeExclusiveCompute].maxNumDedicatedCu > 0) {
    for (uint i = 0; i < properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineCount;
         ++i) {
-      if ((properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] ==
-           Pal::EngineSubType::RtCuHighCompute) ||
-          (properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] ==
-           Pal::EngineSubType::RtCuMedCompute)) {
-        numExclusiveComputeEngines_++;
+      if (properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] ==
+          Pal::EngineSubType::RtCuHighCompute) {
+        if (exclusiveComputeEnginesId_.find(ExclusiveQueueType::RealTime0) !=
+            exclusiveComputeEnginesId_.end()) {
+          exclusiveComputeEnginesId_.insert({ExclusiveQueueType::RealTime1, i});
+        } else {
+          exclusiveComputeEnginesId_.insert({ExclusiveQueueType::RealTime0, i});
+        }
+      }
+      if (properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] ==
+          Pal::EngineSubType::RtCuMedCompute) {
+        exclusiveComputeEnginesId_.insert({ExclusiveQueueType::Medium, i});
      }
    }
  }
@@ -976,9 +982,10 @@ bool Device::initializeHeapResources() {
    // Request all compute engines
    finalizeInfo.requestedEngineCounts[Pal::EngineTypeCompute].engines =
        ((1 << numComputeEngines_) - 1);
-    // Request real time compute engines
-    finalizeInfo.requestedEngineCounts[Pal::EngineTypeExclusiveCompute].engines =
-        ((1 << numExclusiveComputeEngines_) - 1);
+    for (const auto& it: exclusiveComputeEnginesId_) {
+      // Request real time compute engines
+      finalizeInfo.requestedEngineCounts[Pal::EngineTypeExclusiveCompute].engines |= (1 << it.second);
+    }
    // Request all SDMA engines
    finalizeInfo.requestedEngineCounts[Pal::EngineTypeDma].engines = (1 << numDmaEngines_) - 1;

@@ -155,7 +155,11 @@ class ThreadTrace;
 #ifndef CL_FILTER_NONE
 #define CL_FILTER_NONE 0x1142
 #endif
-
+enum class ExclusiveQueueType : uint32_t {
+  RealTime0 = 0,
+  RealTime1,
+  Medium
+};
 class Sampler : public device::Sampler {
 public:
  //! Constructor
@@ -400,7 +404,10 @@ class Device : public NullDevice {
  uint numComputeEngines() const { return numComputeEngines_; }

  //! Returns the number of available compute rings
-  uint numExclusiveComputeEngines() const { return numExclusiveComputeEngines_; }
+  uint numExclusiveComputeEngines() const { return exclusiveComputeEnginesId_.size(); }
+
+  //! Returns the array of available compute rings
+  const auto& exclusiveComputeEnginesId() const { return exclusiveComputeEnginesId_; }

  //! Returns the number of available DMA engines
  uint numDMAEngines() const { return numDmaEngines_; }
@@ -583,7 +590,7 @@ class Device : public NullDevice {
  std::vector<amd::Memory*>* mapCache_;  //!< Map cache info structure
  ResourceCache* resourceCache_;         //!< Resource cache
  uint numComputeEngines_;               //!< The number of available compute engines
-  uint numExclusiveComputeEngines_;      //!< The number of available compute engines
+  std::map<ExclusiveQueueType, uint32_t> exclusiveComputeEnginesId_;//!< The number of available compute engines
  uint numDmaEngines_;                   //!< The number of available compute engines
  bool heapInitComplete_;                //!< Keep track of initialization status of heap resources
  VirtualGPU* xferQueue_;                //!< Transfer queue
@@ -50,17 +50,30 @@ VirtualGPU::Queue* VirtualGPU::Queue::Create(const VirtualGPU& gpu, Pal::QueueTy
  } else {
    cmdCreateInfo.engineType = qCreateInfo.engineType = Pal::EngineTypeCompute;
  }
-
-  if (priority == amd::CommandQueue::Priority::Medium) {
-    qCreateInfo.engineIndex = 0x1;
+  std::map<ExclusiveQueueType, uint32_t>::const_iterator it;
+  if ((priority == amd::CommandQueue::Priority::Medium) &&
+      (amd::CommandQueue::RealTimeDisabled == rtCU)) {
+    it = gpu.dev().exclusiveComputeEnginesId().find(ExclusiveQueueType::Medium);
    cmdCreateInfo.engineType = qCreateInfo.engineType = Pal::EngineTypeExclusiveCompute;
  } else if (amd::CommandQueue::RealTimeDisabled != rtCU) {
    qCreateInfo.numReservedCu = rtCU;
-    qCreateInfo.engineIndex = 0x0;
+    if (priority == amd::CommandQueue::Priority::Medium) {
+      it = gpu.dev().exclusiveComputeEnginesId().find(ExclusiveQueueType::RealTime1);
+    } else {
+      it = gpu.dev().exclusiveComputeEnginesId().find(ExclusiveQueueType::RealTime0);
+    }
    cmdCreateInfo.engineType = qCreateInfo.engineType = Pal::EngineTypeExclusiveCompute;
    cmdCreateInfo.flags.realtimeComputeUnits = true;
  }
-
+  // If the app creates an exclusive compute, then find the engine id
+  if (qCreateInfo.engineType == Pal::EngineTypeExclusiveCompute) {
+    if (it != gpu.dev().exclusiveComputeEnginesId().end()) {
+      qCreateInfo.engineIndex = it->second;
+    }
+    else {
+      return nullptr;
+    }
+  }
  // Find queue object size
  size_t qSize = palDev->GetQueueSize(qCreateInfo, &result);
  if (result != Pal::Result::Success) {