From 488cfd467cd05b07b8b376981fbd29ef733b4ad0 Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Wed, 11 Jun 2025 21:00:13 +0000 Subject: [PATCH] rocr: Always send free scratch notifications Always send notification to profiler tools when scratch memory is freed. --- .../core/runtime/amd_aql_queue.cpp | 49 ++++++++++++++++--- .../core/runtime/amd_gpu_agent.cpp | 4 +- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp index 600b674bbb..5ed4abd651 100644 --- a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp @@ -362,8 +362,20 @@ AqlQueue::~AqlQueue() { Inactivate(); - if (queue_scratch_.main_queue_base) agent_->ReleaseQueueMainScratch(queue_scratch_); - if (queue_scratch_.alt_queue_base) agent_->ReleaseQueueAltScratch(queue_scratch_); + if (queue_scratch_.main_queue_base) { + tool::notify_event_scratch_free_start(public_handle(), + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE); + agent_->ReleaseQueueMainScratch(queue_scratch_); + tool::notify_event_scratch_free_end(public_handle(), + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE); + } + if (queue_scratch_.alt_queue_base) { + tool::notify_event_scratch_free_start(public_handle(), + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT); + agent_->ReleaseQueueAltScratch(queue_scratch_); + tool::notify_event_scratch_free_end(public_handle(), + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT); + } exception_signal_->WaitingDec(); exception_signal_->DestroySignal(); @@ -642,7 +654,13 @@ void AqlQueue::CheckScratchLimits() { void AqlQueue::FreeMainScratchSpace() { auto& scratch = queue_scratch_; - agent_->ReleaseQueueMainScratch(scratch); + if (queue_scratch_.main_queue_base) { + tool::notify_event_scratch_free_start(public_handle(), + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE); + agent_->ReleaseQueueMainScratch(scratch); + tool::notify_event_scratch_free_end(public_handle(), + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE); + } scratch.main_size = 0; scratch.main_size_per_thread = 0; scratch.main_queue_process_offset = 0; @@ -785,7 +803,13 @@ void AqlQueue::AsyncReclaimMainScratch() { void AqlQueue::FreeAltScratchSpace() { auto& scratch = queue_scratch_; - agent_->ReleaseQueueAltScratch(scratch); + if (queue_scratch_.alt_queue_base) { + tool::notify_event_scratch_free_start(public_handle(), + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT); + agent_->ReleaseQueueAltScratch(scratch); + tool::notify_event_scratch_free_end(public_handle(), + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT); + } scratch.alt_size = 0; scratch.alt_size_per_thread = 0; scratch.alt_queue_process_offset = 0; @@ -987,7 +1011,13 @@ void AqlQueue::HandleInsufficientScratch(hsa_signal_value_t& error_code, // scratch.use_alt_limit will be 0 if alt scratch is not supported or disabled if (dispatch_size < scratch.use_alt_limit && dispatch_slots < device_slots) { // Try to use ALT scratch - agent_->ReleaseQueueAltScratch(scratch); + if (scratch.alt_queue_base) { + tool::notify_event_scratch_free_start(public_handle(), + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT); + agent_->ReleaseQueueAltScratch(scratch); + tool::notify_event_scratch_free_end(public_handle(), + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT); + } scratch.alt_size = dispatch_size; scratch.alt_size_per_thread = size_per_thread; @@ -1019,7 +1049,14 @@ void AqlQueue::HandleInsufficientScratch(hsa_signal_value_t& error_code, } // Use PRIMARY scratch - agent_->ReleaseQueueMainScratch(scratch); + if (scratch.main_queue_base) { + tool::notify_event_scratch_free_start(public_handle(), + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE); + agent_->ReleaseQueueMainScratch(scratch); + tool::notify_event_scratch_free_end(public_handle(), + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_NONE); + } + scratch.main_size = device_size; scratch.main_size_per_thread = size_per_thread; scratch.main_lanes_per_wave = lanes_per_wave; diff --git a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 2d5b20667a..27ae3ccba6 100644 --- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -1972,7 +1972,7 @@ void GpuAgent::AcquireQueueMainScratch(ScratchInfo& scratch) { /* Should be called with scratch_lock_ */ void GpuAgent::ReleaseQueueMainScratch(ScratchInfo& scratch) { - if (scratch.main_queue_base == nullptr) return; + assert(scratch.main_queue_base); scratch_cache_.freeMain(scratch); scratch.main_queue_base = nullptr; @@ -2047,7 +2047,7 @@ void GpuAgent::AcquireQueueAltScratch(ScratchInfo& scratch) { /* Should be called with scratch_lock_ */ void GpuAgent::ReleaseQueueAltScratch(ScratchInfo& scratch) { - if (scratch.alt_queue_base == nullptr) return; + assert(scratch.alt_queue_base); scratch_cache_.freeAlt(scratch); scratch.alt_queue_base = nullptr;