From c759986e28ef66491054f0be4eae1685cb1708a3 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Wed, 8 Dec 2021 15:35:54 -0500 Subject: [PATCH] SWDEV-300515 - Apply CU granularity on the CU count Report granularity for possible app query Change-Id: I98857c6f4cc7ae590927ea35ce57d181abe7860b [ROCm/clr commit: f613831471df8de0868f46a6d0217ffabe22c2fd] --- projects/clr/rocclr/device/device.hpp | 2 ++ projects/clr/rocclr/device/pal/paldevice.cpp | 5 ++++- projects/clr/rocclr/device/pal/palvirtual.cpp | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index ef2288819c..0c298977d8 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -521,6 +521,8 @@ struct Info : public amd::EmbeddedObject { uint32_t numRTQueues_; //! Number of available real time compute units uint32_t numRTCUs_; + //! The granularity at which compute units can be dedicated to a queue + uint32_t granularityRTCUs_; //! Thread trace enable uint32_t threadTraceEnable_; diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index 5ca95a2e4f..27da193b92 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -596,7 +596,10 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp, info_.numAsyncQueues_ = numComputeRings; info_.numRTQueues_ = numExclusiveComputeRings; - info_.numRTCUs_ = palProp.engineProperties[Pal::EngineTypeCompute].maxNumDedicatedCu; + + const auto& engineProp = palProp.engineProperties[Pal::EngineTypeCompute]; + info_.numRTCUs_ = engineProp.maxNumDedicatedCu; + info_.granularityRTCUs_ = engineProp.dedicatedCuGranularity; info_.threadTraceEnable_ = settings().threadTraceEnable_; diff --git a/projects/clr/rocclr/device/pal/palvirtual.cpp b/projects/clr/rocclr/device/pal/palvirtual.cpp index 5f87f19dfc..c94a6afe14 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/device/pal/palvirtual.cpp @@ -87,7 +87,8 @@ VirtualGPU::Queue* VirtualGPU::Queue::Create(const VirtualGPU& gpu, Pal::QueueTy cmdCreateInfo.engineType = qCreateInfo.engineType = Pal::EngineTypeCompute; qCreateInfo.priority = Pal::QueuePriority::Medium; } else if (amd::CommandQueue::RealTimeDisabled != rtCU) { - qCreateInfo.numReservedCu = rtCU; + qCreateInfo.numReservedCu = amd::alignDown(rtCU, + gpu.dev().properties().engineProperties[Pal::EngineTypeCompute].dedicatedCuGranularity); if ((priority == amd::CommandQueue::Priority::Medium) && // If Windows HWS is enabled, then the both real time queues are allocated // on the same engine