diff --git a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 08950dda94..de865f9775 100644 --- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -963,6 +963,7 @@ void GpuAgent::AcquireQueueScratch(ScratchInfo& scratch) { bool large = (scratch.size > single_limit) || (scratch_pool_.size() - scratch_pool_.remaining() + scratch.size > small_limit); large = (isa_->GetMajorVersion() < 8) ? false : large; + large = core::Runtime::runtime_singleton_->flag().no_scratch_reclaim() ? false : large; if (large) scratch.queue_base = scratch_pool_.alloc_high(scratch.size); else diff --git a/runtime/hsa-runtime/core/util/flag.h b/runtime/hsa-runtime/core/util/flag.h index 7f6dc9fc58..68ec90a70e 100644 --- a/runtime/hsa-runtime/core/util/flag.h +++ b/runtime/hsa-runtime/core/util/flag.h @@ -106,6 +106,9 @@ class Flag { var = os::GetEnvVar("HSA_FORCE_FINE_GRAIN_PCIE"); fine_grain_pcie_ = (var == "1") ? true : false; + + var = os::GetEnvVar("HSA_NO_SCRATCH_RECLAIM"); + no_scratch_reclaim_ = (var == "1") ? true : false; } bool check_flat_scratch() const { return check_flat_scratch_; } @@ -130,6 +133,8 @@ class Flag { bool fine_grain_pcie() const { return fine_grain_pcie_; } + bool no_scratch_reclaim() const { return no_scratch_reclaim_; } + std::string enable_sdma() const { return enable_sdma_; } std::string visible_gpus() const { return visible_gpus_; } @@ -152,6 +157,7 @@ class Flag { bool disable_fragment_alloc_; bool rev_copy_dir_; bool fine_grain_pcie_; + bool no_scratch_reclaim_; std::string enable_sdma_;