diff --git a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp index 4b4b0cb747..5306663a2b 100644 --- a/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/gpu/gpuvirtual.cpp @@ -422,10 +422,15 @@ bool VirtualGPU::create(bool profiling, uint rtCUs, uint deviceQueueSize, : (dev().getFirstAvailableComputeEngineID() + idx))); } else { - if (priority == amd::CommandQueue::Priority::Medium) { + if ((priority == amd::CommandQueue::Priority::Medium) && + (amd::CommandQueue::RealTimeDisabled == rtCUs)) { engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_MEDIUM_PRIORITY)); } else { - engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_RT)); + if (priority == amd::CommandQueue::Priority::Medium) { + engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_RT)); + } else { + engineMask = dev().engines().getMask((gslEngineID)(GSL_ENGINEID_COMPUTE_RT)); + } } //!@todo This is not a generic solution and // may have issues with > 8 queues diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp index e533330c0e..8a01000cca 100644 --- a/projects/clr/rocclr/runtime/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/runtime/device/pal/paldevice.cpp @@ -717,7 +717,6 @@ Device::Device() mapCache_(nullptr), resourceCache_(nullptr), numComputeEngines_(0), - numExclusiveComputeEngines_(0), numDmaEngines_(0), heapInitComplete_(false), xferQueue_(nullptr), @@ -822,11 +821,18 @@ bool Device::create(Pal::IDevice* device) { if (properties().engineProperties[Pal::EngineTypeExclusiveCompute].maxNumDedicatedCu > 0) { for (uint i = 0; i < properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineCount; ++i) { - if ((properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] == - Pal::EngineSubType::RtCuHighCompute) || - (properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] == - Pal::EngineSubType::RtCuMedCompute)) { - numExclusiveComputeEngines_++; + if (properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] == + Pal::EngineSubType::RtCuHighCompute) { + if (exclusiveComputeEnginesId_.find(ExclusiveQueueType::RealTime0) != + exclusiveComputeEnginesId_.end()) { + exclusiveComputeEnginesId_.insert({ExclusiveQueueType::RealTime1, i}); + } else { + exclusiveComputeEnginesId_.insert({ExclusiveQueueType::RealTime0, i}); + } + } + if (properties().engineProperties[Pal::EngineTypeExclusiveCompute].engineSubType[i] == + Pal::EngineSubType::RtCuMedCompute) { + exclusiveComputeEnginesId_.insert({ExclusiveQueueType::Medium, i}); } } } @@ -976,9 +982,10 @@ bool Device::initializeHeapResources() { // Request all compute engines finalizeInfo.requestedEngineCounts[Pal::EngineTypeCompute].engines = ((1 << numComputeEngines_) - 1); - // Request real time compute engines - finalizeInfo.requestedEngineCounts[Pal::EngineTypeExclusiveCompute].engines = - ((1 << numExclusiveComputeEngines_) - 1); + for (const auto& it: exclusiveComputeEnginesId_) { + // Request real time compute engines + finalizeInfo.requestedEngineCounts[Pal::EngineTypeExclusiveCompute].engines |= (1 << it.second); + } // Request all SDMA engines finalizeInfo.requestedEngineCounts[Pal::EngineTypeDma].engines = (1 << numDmaEngines_) - 1; diff --git a/projects/clr/rocclr/runtime/device/pal/paldevice.hpp b/projects/clr/rocclr/runtime/device/pal/paldevice.hpp index 50a7aaf4cc..bb25529240 100644 --- a/projects/clr/rocclr/runtime/device/pal/paldevice.hpp +++ b/projects/clr/rocclr/runtime/device/pal/paldevice.hpp @@ -155,7 +155,11 @@ class ThreadTrace; #ifndef CL_FILTER_NONE #define CL_FILTER_NONE 0x1142 #endif - +enum class ExclusiveQueueType : uint32_t { + RealTime0 = 0, + RealTime1, + Medium +}; class Sampler : public device::Sampler { public: //! Constructor @@ -400,7 +404,10 @@ class Device : public NullDevice { uint numComputeEngines() const { return numComputeEngines_; } //! Returns the number of available compute rings - uint numExclusiveComputeEngines() const { return numExclusiveComputeEngines_; } + uint numExclusiveComputeEngines() const { return exclusiveComputeEnginesId_.size(); } + + //! Returns the array of available compute rings + const auto& exclusiveComputeEnginesId() const { return exclusiveComputeEnginesId_; } //! Returns the number of available DMA engines uint numDMAEngines() const { return numDmaEngines_; } @@ -583,7 +590,7 @@ class Device : public NullDevice { std::vector* mapCache_; //!< Map cache info structure ResourceCache* resourceCache_; //!< Resource cache uint numComputeEngines_; //!< The number of available compute engines - uint numExclusiveComputeEngines_; //!< The number of available compute engines + std::map exclusiveComputeEnginesId_;//!< The number of available compute engines uint numDmaEngines_; //!< The number of available compute engines bool heapInitComplete_; //!< Keep track of initialization status of heap resources VirtualGPU* xferQueue_; //!< Transfer queue diff --git a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp index 9d35f0485f..79789efde9 100644 --- a/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/runtime/device/pal/palvirtual.cpp @@ -50,17 +50,30 @@ VirtualGPU::Queue* VirtualGPU::Queue::Create(const VirtualGPU& gpu, Pal::QueueTy } else { cmdCreateInfo.engineType = qCreateInfo.engineType = Pal::EngineTypeCompute; } - - if (priority == amd::CommandQueue::Priority::Medium) { - qCreateInfo.engineIndex = 0x1; + std::map::const_iterator it; + if ((priority == amd::CommandQueue::Priority::Medium) && + (amd::CommandQueue::RealTimeDisabled == rtCU)) { + it = gpu.dev().exclusiveComputeEnginesId().find(ExclusiveQueueType::Medium); cmdCreateInfo.engineType = qCreateInfo.engineType = Pal::EngineTypeExclusiveCompute; } else if (amd::CommandQueue::RealTimeDisabled != rtCU) { qCreateInfo.numReservedCu = rtCU; - qCreateInfo.engineIndex = 0x0; + if (priority == amd::CommandQueue::Priority::Medium) { + it = gpu.dev().exclusiveComputeEnginesId().find(ExclusiveQueueType::RealTime1); + } else { + it = gpu.dev().exclusiveComputeEnginesId().find(ExclusiveQueueType::RealTime0); + } cmdCreateInfo.engineType = qCreateInfo.engineType = Pal::EngineTypeExclusiveCompute; cmdCreateInfo.flags.realtimeComputeUnits = true; } - + // If the app creates an exclusive compute, then find the engine id + if (qCreateInfo.engineType == Pal::EngineTypeExclusiveCompute) { + if (it != gpu.dev().exclusiveComputeEnginesId().end()) { + qCreateInfo.engineIndex = it->second; + } + else { + return nullptr; + } + } // Find queue object size size_t qSize = palDev->GetQueueSize(qCreateInfo, &result); if (result != Pal::Result::Success) {