From 7d0b0b6d2cf1aa4b5bcd4f26acf8cf9f3b992dcc Mon Sep 17 00:00:00 2001
From: foreman
Date: Fri, 13 Jul 2018 14:32:29 -0400
Subject: [PATCH] P4 to Git Change 1580311 by gandryey@gera-w8 on 2018/07/13
14:24:01
SWDEV-154474 - [Task]: TAN Dual Realtime Queue Enhancement For Radeon Rays
- Add support for the second RT queue
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_command.cpp#16 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#422 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.cpp#97 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/paldevice.hpp#32 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#114 edit
[ROCm/clr commit: 6b409a027b55777858a7dd7fb3098924665e4ca3]
---
.../rocclr/runtime/device/gpu/gpuvirtual.cpp | 9 +++++--
.../rocclr/runtime/device/pal/paldevice.cpp | 25 ++++++++++++-------
.../rocclr/runtime/device/pal/paldevice.hpp | 13 +++++++---
.../rocclr/runtime/device/pal/palvirtual.cpp | 23 +++++++++++++----
4 files changed, 51 insertions(+), 19 deletions(-)
diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
index 4b4b0cb747..5306663a2b 100644
--- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp
@@ -422,10 +422,15 @@ bool VirtualGPU::create(bool profiling, uint rtCUs, uint deviceQueueSize,
: (dev().getFirstAvailableComputeEngineID() + idx)));
} else {
- if (priority == amd::CommandQueue::Priority::Medium) {
+ if ((priority == amd::CommandQueue::Priority::Medium) &&
+ (amd::CommandQueue::RealTimeDisabled == rtCUs)) {
engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_MEDIUM_PRIORITY));
} else {
- engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_RT));
+ if (priority == amd::CommandQueue::Priority::Medium) {
+ engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_RT));
+ } else {
+ engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_RT));
+ }
}
//!@todo This is not a generic solution and
// may have issues with > 8 queues
diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
index e533330c0e..8a01000cca 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp
@@ -717,7 +717,6 @@ Device::Device()
mapCache_(nullptr),
resourceCache_(nullptr),
numComputeEngines_(0),
- numExclusiveComputeEngines_(0),
numDmaEngines_(0),
heapInitComplete_(false),
xferQueue_(nullptr),
@@ -822,11 +821,18 @@ bool Device::create(Pal::IDevice* device) {
if (properties().engineProperties[Pal::EngineTypeExclusiveCompute].maxNumDedicatedCu > 0) {
for (uint i = 0; i < properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineCount;
++i) {
- if ((properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] ==
- Pal::EngineSubType::RtCuHighCompute) ||
- (properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] ==
- Pal::EngineSubType::RtCuMedCompute)) {
- numExclusiveComputeEngines_++;
+ if (properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] ==
+ Pal::EngineSubType::RtCuHighCompute) {
+ if (exclusiveComputeEnginesId_.find(ExclusiveQueueType::RealTime0) !=
+ exclusiveComputeEnginesId_.end()) {
+ exclusiveComputeEnginesId_.insert({ExclusiveQueueType::RealTime1, i});
+ } else {
+ exclusiveComputeEnginesId_.insert({ExclusiveQueueType::RealTime0, i});
+ }
+ }
+ if (properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] ==
+ Pal::EngineSubType::RtCuMedCompute) {
+ exclusiveComputeEnginesId_.insert({ExclusiveQueueType::Medium, i});
}
}
}
@@ -976,9 +982,10 @@ bool Device::initializeHeapResources() {
// Request all compute engines
finalizeInfo.requestedEngineCounts[Pal::EngineTypeCompute].engines =
((1 << numComputeEngines_) - 1);
- // Request real time compute engines
- finalizeInfo.requestedEngineCounts[Pal::EngineTypeExclusiveCompute].engines =
- ((1 << numExclusiveComputeEngines_) - 1);
+ for (const auto& it: exclusiveComputeEnginesId_) {
+ // Request real time compute engines
+ finalizeInfo.requestedEngineCounts[Pal::EngineTypeExclusiveCompute].engines |= (1 << it.second);
+ }
// Request all SDMA engines
finalizeInfo.requestedEngineCounts[Pal::EngineTypeDma].engines = (1 << numDmaEngines_) - 1;
diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.hpp b/projects/clr/rocclr/runtime/device/pal/paldevice.hpp
index 50a7aaf4cc..bb25529240 100644
--- a/projects/clr/rocclr/runtime/device/pal/paldevice.hpp
+++ b/projects/clr/rocclr/runtime/device/pal/paldevice.hpp
@@ -155,7 +155,11 @@ class ThreadTrace;
#ifndef CL_FILTER_NONE
#define CL_FILTER_NONE 0x1142
#endif
-
+enum class ExclusiveQueueType : uint32_t {
+ RealTime0 = 0,
+ RealTime1,
+ Medium
+};
class Sampler : public device::Sampler {
public:
//! Constructor
@@ -400,7 +404,10 @@ class Device : public NullDevice {
uint numComputeEngines() const { return numComputeEngines_; }
//! Returns the number of available compute rings
- uint numExclusiveComputeEngines() const { return numExclusiveComputeEngines_; }
+ uint numExclusiveComputeEngines() const { return exclusiveComputeEnginesId_.size(); }
+
+ //! Returns the array of available compute rings
+ const auto& exclusiveComputeEnginesId() const { return exclusiveComputeEnginesId_; }
//! Returns the number of available DMA engines
uint numDMAEngines() const { return numDmaEngines_; }
@@ -583,7 +590,7 @@ class Device : public NullDevice {
std::vector* mapCache_; //!< Map cache info structure
ResourceCache* resourceCache_; //!< Resource cache
uint numComputeEngines_; //!< The number of available compute engines
- uint numExclusiveComputeEngines_; //!< The number of available compute engines
+ std::map exclusiveComputeEnginesId_;//!< The number of available compute engines
uint numDmaEngines_; //!< The number of available compute engines
bool heapInitComplete_; //!< Keep track of initialization status of heap resources
VirtualGPU* xferQueue_; //!< Transfer queue
diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
index 9d35f0485f..79789efde9 100644
--- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp
@@ -50,17 +50,30 @@ VirtualGPU::Queue* VirtualGPU::Queue::Create(const VirtualGPU& gpu, Pal::QueueTy
} else {
cmdCreateInfo.engineType = qCreateInfo.engineType = Pal::EngineTypeCompute;
}
-
- if (priority == amd::CommandQueue::Priority::Medium) {
- qCreateInfo.engineIndex = 0x1;
+ std::map::const_iterator it;
+ if ((priority == amd::CommandQueue::Priority::Medium) &&
+ (amd::CommandQueue::RealTimeDisabled == rtCU)) {
+ it = gpu.dev().exclusiveComputeEnginesId().find(ExclusiveQueueType::Medium);
cmdCreateInfo.engineType = qCreateInfo.engineType = Pal::EngineTypeExclusiveCompute;
} else if (amd::CommandQueue::RealTimeDisabled != rtCU) {
qCreateInfo.numReservedCu = rtCU;
- qCreateInfo.engineIndex = 0x0;
+ if (priority == amd::CommandQueue::Priority::Medium) {
+ it = gpu.dev().exclusiveComputeEnginesId().find(ExclusiveQueueType::RealTime1);
+ } else {
+ it = gpu.dev().exclusiveComputeEnginesId().find(ExclusiveQueueType::RealTime0);
+ }
cmdCreateInfo.engineType = qCreateInfo.engineType = Pal::EngineTypeExclusiveCompute;
cmdCreateInfo.flags.realtimeComputeUnits = true;
}
-
+ // If the app creates an exclusive compute, then find the engine id
+ if (qCreateInfo.engineType == Pal::EngineTypeExclusiveCompute) {
+ if (it != gpu.dev().exclusiveComputeEnginesId().end()) {
+ qCreateInfo.engineIndex = it->second;
+ }
+ else {
+ return nullptr;
+ }
+ }
// Find queue object size
size_t qSize = palDev->GetQueueSize(qCreateInfo, &result);
if (result != Pal::Result::Success) {