diff --git a/projects/rccl-tests/Makefile b/projects/rccl-tests/Makefile index 8e0154aa31..cf64f3db22 100644 --- a/projects/rccl-tests/Makefile +++ b/projects/rccl-tests/Makefile @@ -7,9 +7,9 @@ BUILDDIR ?= build override BUILDDIR := $(abspath $(BUILDDIR)) -.PHONY : all clean +.PHONY: all clean -default : src.build +default: src.build TARGETS=$(filter-out src/hypercube.cu, $(wildcard src/*)) diff --git a/projects/rccl-tests/README.md b/projects/rccl-tests/README.md index 74f15515b4..9630c50fef 100644 --- a/projects/rccl-tests/README.md +++ b/projects/rccl-tests/README.md @@ -46,9 +46,9 @@ Run on 8 GPUs (`-g 8`), scanning from 8 Bytes to 128MBytes : $ ./build/all_reduce_perf -b 8 -e 128M -f 2 -g 8 ``` -Run with MPI on 40 processes (potentially on multiple nodes) with 4 GPUs each : +Run with MPI on 10 processes (potentially on multiple nodes) with 4 GPUs each, for a total of 40 GPUs: ```shell -$ mpirun -np 40 ./build/all_reduce_perf -b 8 -e 128M -f 2 -g 4 +$ mpirun -np 10 ./build/all_reduce_perf -b 8 -e 128M -f 2 -g 4 ``` ### Performance @@ -66,7 +66,7 @@ All tests support the same set of arguments : * `-b,--minbytes ` minimum size to start with. Default : 32M. * `-e,--maxbytes ` maximum size to end at. Default : 32M. * Increments can be either fixed or a multiplication factor. Only one of those should be used - * `-i,--stepbytes ` fixed increment between sizes. Default : (max-min)/10. + * `-i,--stepbytes ` fixed increment between sizes. Default : 1M. * `-f,--stepfactor ` multiplication factor between sizes. Default : disabled. * RCCL operations arguments * `-o,--op ` Specify which reduction operation to perform. Only relevant for reduction operations like Allreduce, Reduce or ReduceScatter. Default : Sum. diff --git a/projects/rccl-tests/src/common.cu b/projects/rccl-tests/src/common.cu index b2fba76961..c71bf00049 100644 --- a/projects/rccl-tests/src/common.cu +++ b/projects/rccl-tests/src/common.cu @@ -310,7 +310,7 @@ testResult_t CheckData(struct threadArgs* args, ncclDataType_t type, ncclRedOp_t *wrongElts = 0; for (int i=0; i < args->nGpus; i++) *wrongElts += wrongPerGpu[i]; - hipFree(wrongPerGpu); + hipHostFree(wrongPerGpu); if (args->reportErrors && *wrongElts) args->errors[0]++; return testSuccess; @@ -1169,6 +1169,8 @@ testResult_t run() { errors[t] = bw_count[t] = 0; } + fflush(stdout); + const char* timeStr = report_cputime ? "cputime" : "time"; PRINT("#\n"); PRINT("# %10s %12s %8s %6s %6s out-of-place in-place \n", "", "", "", "", ""); diff --git a/projects/rccl-tests/src/common.h b/projects/rccl-tests/src/common.h index cb3bd3f3c2..9ed929a905 100644 --- a/projects/rccl-tests/src/common.h +++ b/projects/rccl-tests/src/common.h @@ -177,15 +177,46 @@ static void getHostName(char* hostname, int maxlen) { #include -static uint64_t getHostHash(const char* string) { - // Based on DJB2, result = result * 33 + char +static uint64_t getHash(const char* string, size_t n) { + // Based on DJB2a, result = result * 33 ^ char uint64_t result = 5381; - for (int c = 0; string[c] != '\0'; c++){ - result = ((result << 5) + result) + string[c]; + for (size_t c = 0; c < n; c++) { + result = ((result << 5) + result) ^ string[c]; } return result; } +/* Generate a hash of the unique identifying string for this host + * that will be unique for both bare-metal and container instances + * Equivalent of a hash of; + * + * $(hostname)$(cat /proc/sys/kernel/random/boot_id) + * + */ +#define HOSTID_FILE "/proc/sys/kernel/random/boot_id" +static uint64_t getHostHash(const char* hostname) { + char hostHash[1024]; + + // Fall back is the hostname if something fails + (void) strncpy(hostHash, hostname, sizeof(hostHash)); + int offset = strlen(hostHash); + + FILE *file = fopen(HOSTID_FILE, "r"); + if (file != NULL) { + char *p; + if (fscanf(file, "%ms", &p) == 1) { + strncpy(hostHash+offset, p, sizeof(hostHash)-offset-1); + free(p); + } + } + fclose(file); + + // Make sure the string is terminated + hostHash[sizeof(hostHash)-1]='\0'; + + return getHash(hostHash, strlen(hostHash)); +} + static size_t wordSize(ncclDataType_t type) { switch(type) { case ncclChar: