From 146ecc221282e54ceae899d9c7a88f1176499fe2 Mon Sep 17 00:00:00 2001 From: David Addison Date: Wed, 21 May 2025 09:40:26 -0700 Subject: [PATCH] Add extra reserved space during maxBytes calculation Also, don't allow minBytes > maxBytes [ROCm/rccl-tests commit: 6edafa0a9ca5964e2236afea0951a0f2d7df23cd] --- projects/rccl-tests/src/common.cu | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/projects/rccl-tests/src/common.cu b/projects/rccl-tests/src/common.cu index 69b892ac2b..b7c3e0c0a8 100644 --- a/projects/rccl-tests/src/common.cu +++ b/projects/rccl-tests/src/common.cu @@ -16,6 +16,9 @@ #include "../verifiable/verifiable.h" +#define DIVUP(x, y) \ + (((x)+(y)-1)/(y)) + int test_ncclVersion = 0; // init'd with ncclGetVersion() #if NCCL_MAJOR >= 2 @@ -1047,10 +1050,14 @@ testResult_t run() { PRINT("%s", line); #endif + // Reserve 1GiB of memory for each 16GiB installed, but limit to a max of 4GiB + const size_t GB = (1ULL << 30); + size_t reserveMem = std::min(DIVUP(maxMem, 16*GB) * 1*GB, 4*GB); // We need sendbuff, recvbuff, expected (when datacheck enabled), plus 1G for the rest. - size_t memMaxBytes = (maxMem - (1<<30)) / (datacheck ? 3 : 2); + size_t memMaxBytes = (maxMem - reserveMem - 1*GB) / (datacheck ? 3 : 2); if (maxBytes > memMaxBytes) { maxBytes = memMaxBytes; + if (minBytes > maxBytes) minBytes = maxBytes; if (proc == 0) printf("#\n# Reducing maxBytes to %ld due to memory limitation\n", maxBytes); }