rocrtst: Reduce host memory limit to 70% (#905)

* rocrtst: Reduce host memory limit to 70%

Reducing the upper bound for rocrtstFunc.Memory_Max_Mem to 70% from
90% to help reduce test execution time.

Signed-off-by: Sunday Clement <Sunday.Clement@amd.com>

* rocrtst: Add ROCRTST_LIMIT_POOL_SIZE env var

Add environment variable to override the memory pool sizes when running
tests.

Co-authored-by: David Yat Sin <David.YatSin@amd.com>

---------

Signed-off-by: Sunday Clement <Sunday.Clement@amd.com>
Co-authored-by: David Yat Sin <David.YatSin@amd.com>
This commit is contained in:
Sunday Clement
2025-09-22 09:39:00 -04:00
committed by GitHub
parent 9b350754cc
commit f3e1db176a
3 changed files with 24 additions and 4 deletions
@@ -62,6 +62,8 @@ namespace rocrtst {
} \
}
size_t pool_size_limit = 0;
static hsa_status_t FindAgent(hsa_agent_t agent, void* data,
hsa_device_type_t dev_type) {
assert(data != nullptr);
@@ -405,6 +407,17 @@ hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool,
const size_t max_pool_size = 2*1024*1024*1024UL;
pool_i->size = std::min(pool_i->size, max_pool_size);
#endif
pool_size_limit = 0;
char *pool_size_limit_str = getenv("ROCRTST_LIMIT_POOL_SIZE");
if (pool_size_limit_str) {
char *end;
pool_size_limit = strtoul(pool_size_limit_str, &end, 10);
if (pool_size_limit > pool_i->size) {
std::cout << "Warning: Pool size override > than reported size (override:"
<< pool_size_limit << " reported:" << pool_i->size << ")" << std::endl;
}
pool_i->size = pool_size_limit;
}
err = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
@@ -110,6 +110,8 @@ struct agent_pools_t{
std::vector<hsa_amd_memory_pool_t> pools;
};
extern size_t pool_size_limit;
/// Fill in the pool_info_t structure for the provided pool.
/// \param[in] pool Pool for which information will be retrieved
/// \param[out] pool_i Pointer to structure where pool info will be stored
@@ -237,10 +237,15 @@ void MemoryTest::MaxSingleAllocationTest(hsa_agent_t ag,
std::min(pool_sz, info.totalram / gran_sz) :
pool_sz;
// Reduce upper_bound by 10% for system-RAM. Otherwise Linux OOM-Killer app can be triggered,
// if system has allocated all available physical memory and swap space, and so killing this
// process.
uint64_t upper_bound = (ag_type == HSA_DEVICE_TYPE_CPU) ? (pool_sz * 0.90) : pool_sz;
// Reduce upper_bound by 30% or 10% for system-RAM, depending on pool size limit. Otherwise
// Linux OOM-Killer app can be triggered if system has allocated all available physical
// memory and swap space, and so killing this process.
float pool_size_limit_ratio = 1.0;
if (ag_type == HSA_DEVICE_TYPE_CPU) {
pool_size_limit_ratio = rocrtst::pool_size_limit ? 0.9 : 0.7;
}
uint64_t upper_bound = pool_size_limit_ratio * pool_sz;
uint64_t lower_bound = 0;
auto max_alloc_size = upper_bound;