Merge remote-tracking branch 'nccl/master' into develop
[ROCm/rccl-tests commit: bb0f15d407]
This commit is contained in:
@@ -7,9 +7,9 @@
|
||||
BUILDDIR ?= build
|
||||
override BUILDDIR := $(abspath $(BUILDDIR))
|
||||
|
||||
.PHONY : all clean
|
||||
.PHONY: all clean
|
||||
|
||||
default : src.build
|
||||
default: src.build
|
||||
|
||||
TARGETS=$(filter-out src/hypercube.cu, $(wildcard src/*))
|
||||
|
||||
|
||||
@@ -46,9 +46,9 @@ Run on 8 GPUs (`-g 8`), scanning from 8 Bytes to 128MBytes :
|
||||
$ ./build/all_reduce_perf -b 8 -e 128M -f 2 -g 8
|
||||
```
|
||||
|
||||
Run with MPI on 40 processes (potentially on multiple nodes) with 4 GPUs each :
|
||||
Run with MPI on 10 processes (potentially on multiple nodes) with 4 GPUs each, for a total of 40 GPUs:
|
||||
```shell
|
||||
$ mpirun -np 40 ./build/all_reduce_perf -b 8 -e 128M -f 2 -g 4
|
||||
$ mpirun -np 10 ./build/all_reduce_perf -b 8 -e 128M -f 2 -g 4
|
||||
```
|
||||
|
||||
### Performance
|
||||
@@ -66,7 +66,7 @@ All tests support the same set of arguments :
|
||||
* `-b,--minbytes <min size in bytes>` minimum size to start with. Default : 32M.
|
||||
* `-e,--maxbytes <max size in bytes>` maximum size to end at. Default : 32M.
|
||||
* Increments can be either fixed or a multiplication factor. Only one of those should be used
|
||||
* `-i,--stepbytes <increment size>` fixed increment between sizes. Default : (max-min)/10.
|
||||
* `-i,--stepbytes <increment size>` fixed increment between sizes. Default : 1M.
|
||||
* `-f,--stepfactor <increment factor>` multiplication factor between sizes. Default : disabled.
|
||||
* RCCL operations arguments
|
||||
* `-o,--op <sum/prod/min/max/avg/all>` Specify which reduction operation to perform. Only relevant for reduction operations like Allreduce, Reduce or ReduceScatter. Default : Sum.
|
||||
|
||||
@@ -310,7 +310,7 @@ testResult_t CheckData(struct threadArgs* args, ncclDataType_t type, ncclRedOp_t
|
||||
|
||||
*wrongElts = 0;
|
||||
for (int i=0; i < args->nGpus; i++) *wrongElts += wrongPerGpu[i];
|
||||
hipFree(wrongPerGpu);
|
||||
hipHostFree(wrongPerGpu);
|
||||
|
||||
if (args->reportErrors && *wrongElts) args->errors[0]++;
|
||||
return testSuccess;
|
||||
@@ -1169,6 +1169,8 @@ testResult_t run() {
|
||||
errors[t] = bw_count[t] = 0;
|
||||
}
|
||||
|
||||
fflush(stdout);
|
||||
|
||||
const char* timeStr = report_cputime ? "cputime" : "time";
|
||||
PRINT("#\n");
|
||||
PRINT("# %10s %12s %8s %6s %6s out-of-place in-place \n", "", "", "", "", "");
|
||||
|
||||
@@ -177,15 +177,46 @@ static void getHostName(char* hostname, int maxlen) {
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
static uint64_t getHostHash(const char* string) {
|
||||
// Based on DJB2, result = result * 33 + char
|
||||
static uint64_t getHash(const char* string, size_t n) {
|
||||
// Based on DJB2a, result = result * 33 ^ char
|
||||
uint64_t result = 5381;
|
||||
for (int c = 0; string[c] != '\0'; c++){
|
||||
result = ((result << 5) + result) + string[c];
|
||||
for (size_t c = 0; c < n; c++) {
|
||||
result = ((result << 5) + result) ^ string[c];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Generate a hash of the unique identifying string for this host
|
||||
* that will be unique for both bare-metal and container instances
|
||||
* Equivalent of a hash of;
|
||||
*
|
||||
* $(hostname)$(cat /proc/sys/kernel/random/boot_id)
|
||||
*
|
||||
*/
|
||||
#define HOSTID_FILE "/proc/sys/kernel/random/boot_id"
|
||||
static uint64_t getHostHash(const char* hostname) {
|
||||
char hostHash[1024];
|
||||
|
||||
// Fall back is the hostname if something fails
|
||||
(void) strncpy(hostHash, hostname, sizeof(hostHash));
|
||||
int offset = strlen(hostHash);
|
||||
|
||||
FILE *file = fopen(HOSTID_FILE, "r");
|
||||
if (file != NULL) {
|
||||
char *p;
|
||||
if (fscanf(file, "%ms", &p) == 1) {
|
||||
strncpy(hostHash+offset, p, sizeof(hostHash)-offset-1);
|
||||
free(p);
|
||||
}
|
||||
}
|
||||
fclose(file);
|
||||
|
||||
// Make sure the string is terminated
|
||||
hostHash[sizeof(hostHash)-1]='\0';
|
||||
|
||||
return getHash(hostHash, strlen(hostHash));
|
||||
}
|
||||
|
||||
static size_t wordSize(ncclDataType_t type) {
|
||||
switch(type) {
|
||||
case ncclChar:
|
||||
|
||||
مرجع در شماره جدید
Block a user