From dafb70408d761454eeaf82f58b46d78a90eb3020 Mon Sep 17 00:00:00 2001 From: Martin Belanger Date: Tue, 3 Jun 2025 11:43:02 -0400 Subject: [PATCH 1/3] Print the name of the program being executed One thing missing from the stdout of each performance test is the name of the test that is actually being run. This patch adds 2 new messages to the stdout. At the beginning of the execution of a test (e.g. sendrecv_perf) we will now see this message: Collective test starting: sendrecv_perf And at the end, we will now see this: Collective test concluded: sendrecv_perf This is needed when running several tests consecutively and we're trying to parse the stdout to collect the results. For example, using a Python script to parse the stdout, one could retrieve the results for each test and plot them on a graph. This patch makes it easier to implement such a script. Signed-off-by: Martin Belanger --- src/common.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/common.cu b/src/common.cu index 3987d89081..a4a92d51fe 100644 --- a/src/common.cu +++ b/src/common.cu @@ -13,6 +13,7 @@ #include #include #include "cuda.h" +#include /* program_invocation_short_name */ #include "../verifiable/verifiable.h" @@ -1006,6 +1007,7 @@ testResult_t run() { #endif is_main_thread = is_main_proc = (proc == 0) ? 1 : 0; + PRINT("# Collective test starting: %s\n", program_invocation_short_name); PRINT("# nThread %d nGpus %d minBytes %ld maxBytes %ld step: %ld(%s) warmup iters: %d iters: %d agg iters: %d validation: %d graph: %d\n", nThreads, nGpus, minBytes, maxBytes, (stepFactor > 1)?stepFactor:stepBytes, (stepFactor > 1)?"factor":"bytes", @@ -1257,6 +1259,7 @@ testResult_t run() { PRINT("# Out of bounds values : %d %s\n", errors[0], errors[0] ? "FAILED" : "OK"); PRINT("# Avg bus bandwidth : %g %s\n", bw[0], check_avg_bw == -1 ? "" : (bw[0] < check_avg_bw*(0.9) ? "FAILED" : "OK")); PRINT("#\n"); + PRINT("# Collective test concluded: %s\n", program_invocation_short_name); #ifdef MPI_SUPPORT MPI_Comm_free(&mpi_comm); MPI_Finalize(); From def2d3689c4dc2390d903ecea1cb1d9314134661 Mon Sep 17 00:00:00 2001 From: David Addison Date: Wed, 23 Jul 2025 16:04:30 -0700 Subject: [PATCH 2/3] Minor fix to Makefile Move comments to separate lines --- src/Makefile | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Makefile b/src/Makefile index 612395f645..b097765ccd 100644 --- a/src/Makefile +++ b/src/Makefile @@ -5,9 +5,12 @@ # include common.mk -MPI ?= 0 # Set to 1 to enable MPI support (multi-process/multi-node) -NAME_SUFFIX ?= # e.g. _mpi when using MPI=1 -DSO ?= 0 # Set to 1 to create and use libverifiable.so to reduce binary size +# Set to 1 to enable MPI support (multi-process/multi-node) +MPI ?= 0 +# e.g. Set to _mpi when using MPI=1 +NAME_SUFFIX ?= +# Set to 1 to create and use libverifiable.so to reduce binary size +DSO ?= 0 .PHONY: build clean From 6edafa0a9ca5964e2236afea0951a0f2d7df23cd Mon Sep 17 00:00:00 2001 From: David Addison Date: Wed, 21 May 2025 09:40:26 -0700 Subject: [PATCH 3/3] Add extra reserved space during maxBytes calculation Also, don't allow minBytes > maxBytes --- src/common.cu | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/common.cu b/src/common.cu index 69b892ac2b..b7c3e0c0a8 100644 --- a/src/common.cu +++ b/src/common.cu @@ -16,6 +16,9 @@ #include "../verifiable/verifiable.h" +#define DIVUP(x, y) \ + (((x)+(y)-1)/(y)) + int test_ncclVersion = 0; // init'd with ncclGetVersion() #if NCCL_MAJOR >= 2 @@ -1047,10 +1050,14 @@ testResult_t run() { PRINT("%s", line); #endif + // Reserve 1GiB of memory for each 16GiB installed, but limit to a max of 4GiB + const size_t GB = (1ULL << 30); + size_t reserveMem = std::min(DIVUP(maxMem, 16*GB) * 1*GB, 4*GB); // We need sendbuff, recvbuff, expected (when datacheck enabled), plus 1G for the rest. - size_t memMaxBytes = (maxMem - (1<<30)) / (datacheck ? 3 : 2); + size_t memMaxBytes = (maxMem - reserveMem - 1*GB) / (datacheck ? 3 : 2); if (maxBytes > memMaxBytes) { maxBytes = memMaxBytes; + if (minBytes > maxBytes) minBytes = maxBytes; if (proc == 0) printf("#\n# Reducing maxBytes to %ld due to memory limitation\n", maxBytes); }