From f3e1db176a5c16783f8971fdd27831d0844fdc8e Mon Sep 17 00:00:00 2001 From: Sunday Clement <83687182+Sundance636@users.noreply.github.com> Date: Mon, 22 Sep 2025 09:39:00 -0400 Subject: [PATCH] rocrtst: Reduce host memory limit to 70% (#905) * rocrtst: Reduce host memory limit to 70% Reducing the upper bound for rocrtstFunc.Memory_Max_Mem to 70% from 90% to help reduce test execution time. Signed-off-by: Sunday Clement * rocrtst: Add ROCRTST_LIMIT_POOL_SIZE env var Add environment variable to override the memory pool sizes when running tests. Co-authored-by: David Yat Sin --------- Signed-off-by: Sunday Clement Co-authored-by: David Yat Sin --- projects/rocr-runtime/rocrtst/common/common.cc | 13 +++++++++++++ projects/rocr-runtime/rocrtst/common/common.h | 2 ++ .../rocrtst/suites/functional/memory_basic.cc | 13 +++++++++---- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/projects/rocr-runtime/rocrtst/common/common.cc b/projects/rocr-runtime/rocrtst/common/common.cc index b6650e9775..fca47d12ba 100644 --- a/projects/rocr-runtime/rocrtst/common/common.cc +++ b/projects/rocr-runtime/rocrtst/common/common.cc @@ -62,6 +62,8 @@ namespace rocrtst { } \ } +size_t pool_size_limit = 0; + static hsa_status_t FindAgent(hsa_agent_t agent, void* data, hsa_device_type_t dev_type) { assert(data != nullptr); @@ -405,6 +407,17 @@ hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool, const size_t max_pool_size = 2*1024*1024*1024UL; pool_i->size = std::min(pool_i->size, max_pool_size); #endif + pool_size_limit = 0; + char *pool_size_limit_str = getenv("ROCRTST_LIMIT_POOL_SIZE"); + if (pool_size_limit_str) { + char *end; + pool_size_limit = strtoul(pool_size_limit_str, &end, 10); + if (pool_size_limit > pool_i->size) { + std::cout << "Warning: Pool size override > than reported size (override:" + << pool_size_limit << " reported:" << pool_i->size << ")" << std::endl; + } + pool_i->size = pool_size_limit; + } err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, diff --git a/projects/rocr-runtime/rocrtst/common/common.h b/projects/rocr-runtime/rocrtst/common/common.h index abca31ca75..7591319f64 100644 --- a/projects/rocr-runtime/rocrtst/common/common.h +++ b/projects/rocr-runtime/rocrtst/common/common.h @@ -110,6 +110,8 @@ struct agent_pools_t{ std::vector pools; }; +extern size_t pool_size_limit; + /// Fill in the pool_info_t structure for the provided pool. /// \param[in] pool Pool for which information will be retrieved /// \param[out] pool_i Pointer to structure where pool info will be stored diff --git a/projects/rocr-runtime/rocrtst/suites/functional/memory_basic.cc b/projects/rocr-runtime/rocrtst/suites/functional/memory_basic.cc index dbb12bb5d1..99d4f39976 100644 --- a/projects/rocr-runtime/rocrtst/suites/functional/memory_basic.cc +++ b/projects/rocr-runtime/rocrtst/suites/functional/memory_basic.cc @@ -237,10 +237,15 @@ void MemoryTest::MaxSingleAllocationTest(hsa_agent_t ag, std::min(pool_sz, info.totalram / gran_sz) : pool_sz; - // Reduce upper_bound by 10% for system-RAM. Otherwise Linux OOM-Killer app can be triggered, - // if system has allocated all available physical memory and swap space, and so killing this - // process. - uint64_t upper_bound = (ag_type == HSA_DEVICE_TYPE_CPU) ? (pool_sz * 0.90) : pool_sz; + // Reduce upper_bound by 30% or 10% for system-RAM, depending on pool size limit. Otherwise + // Linux OOM-Killer app can be triggered if system has allocated all available physical + // memory and swap space, and so killing this process. + float pool_size_limit_ratio = 1.0; + if (ag_type == HSA_DEVICE_TYPE_CPU) { + pool_size_limit_ratio = rocrtst::pool_size_limit ? 0.9 : 0.7; + } + + uint64_t upper_bound = pool_size_limit_ratio * pool_sz; uint64_t lower_bound = 0; auto max_alloc_size = upper_bound;