diff --git a/runtime/hsa-runtime/core/inc/amd_gpu_agent.h b/runtime/hsa-runtime/core/inc/amd_gpu_agent.h index 938c87ef7f..359080c06e 100644 --- a/runtime/hsa-runtime/core/inc/amd_gpu_agent.h +++ b/runtime/hsa-runtime/core/inc/amd_gpu_agent.h @@ -343,6 +343,8 @@ class GpuAgent : public GpuAgentInt { // @brief returns true if agent uses MES scheduler __forceinline const bool isMES() const { return (isa_->GetMajorVersion() >= 11) ? true : false; }; + void ReserveScratch(); + void Trim() override; const std::function& diff --git a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp index f8ff419881..b09ea82a79 100644 --- a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp @@ -285,6 +285,9 @@ AqlQueue::AqlQueue(GpuAgent* agent, size_t req_size_pkts, HSAuint32 node_id, Scr queue_id_ = queue_rsrc.QueueId; MAKE_NAMED_SCOPE_GUARD(QueueGuard, [&]() { hsaKmtDestroyQueue(queue_id_); }); + // On the first queue creation, reserve some scratch memory on this agent. + agent_->ReserveScratch(); + // Initialize scratch memory related entities queue_scratch_.queue_retry = amd_queue_.queue_inactive_signal; InitScratchSRD(); diff --git a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index c695ba4116..9ccbf6b893 100644 --- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -499,9 +499,12 @@ void GpuAgent::InitScratchPool() { } else { new (&scratch_pool_) SmallHeap(); } +} +void GpuAgent::ReserveScratch() +{ size_t reserved_sz = core::Runtime::runtime_singleton_->flag().scratch_single_limit(); - if (reserved_sz) { + if (!scratch_cache_.reserved_bytes() && reserved_sz) { HSAuint64 alt_va; void* reserved_base = scratch_pool_.alloc(reserved_sz); assert(reserved_base && "Could not allocate reserved memory");