From 1ec0ba35372d6fc89e85a0c194d2fa22b01368a5 Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Thu, 24 Aug 2023 17:36:00 +0000 Subject: [PATCH] SWDEV-301667 - Use large signal pool Use large signal pool if profiler is connected or profiling forced enabled. This is needed to mitigate signal creation overhead when profiling as signals are attached to every packet and deeper batch may show overhead of signal allocation. Change-Id: I8034b8a20b55328b87d593bf044f59672f9653e8 --- rocclr/device/rocm/rocvirtual.cpp | 3 +++ rocclr/platform/commandqueue.cpp | 9 ++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/rocclr/device/rocm/rocvirtual.cpp b/rocclr/device/rocm/rocvirtual.cpp index 6450dd86ae..dce0dd55f1 100644 --- a/rocclr/device/rocm/rocvirtual.cpp +++ b/rocclr/device/rocm/rocvirtual.cpp @@ -341,6 +341,9 @@ VirtualGPU::HwQueueTracker::~HwQueueTracker() { // ================================================================================================ bool VirtualGPU::HwQueueTracker::Create() { uint kSignalListSize = ROC_SIGNAL_POOL_SIZE; + if (activity_prof::IsEnabled(OP_ID_DISPATCH) || gpu_.profiling_) { + kSignalListSize = !flagIsDefault(ROC_SIGNAL_POOL_SIZE) ? ROC_SIGNAL_POOL_SIZE : 4 * Ki; + } signal_list_.resize(kSignalListSize); hsa_agent_t agent = gpu_.gpu_device(); diff --git a/rocclr/platform/commandqueue.cpp b/rocclr/platform/commandqueue.cpp index 7bf95a2fc6..96829e4a4c 100644 --- a/rocclr/platform/commandqueue.cpp +++ b/rocclr/platform/commandqueue.cpp @@ -42,6 +42,9 @@ HostQueue::HostQueue(Context& context, Device& device, cl_command_queue_properti tail_(nullptr), isActive_(false), markerTsCount_(0) { + if (GPU_FORCE_QUEUE_PROFILING) { + properties().set(CL_QUEUE_PROFILING_ENABLE); + } if (AMD_DIRECT_DISPATCH) { // Initialize the queue thread_.Init(this); @@ -52,10 +55,6 @@ HostQueue::HostQueue(Context& context, Device& device, cl_command_queue_properti queueLock_.wait(); } } - - if (GPU_FORCE_QUEUE_PROFILING) { - properties().set(CL_QUEUE_PROFILING_ENABLE); - } } bool HostQueue::terminate() { @@ -239,7 +238,7 @@ void HostQueue::append(Command& command) { // Set last submitted command Command* prevLastEnqueueCommand = nullptr; - + // Attach only real commands and skip internal notifications for CPU queue if (command.waitingEvent() == nullptr) { command.retain();