From 07bb6fce8f63e945b1baf60aeec50c33553f06fb Mon Sep 17 00:00:00 2001 From: Wenkai Du Date: Mon, 11 Nov 2019 14:08:31 -0800 Subject: [PATCH] rccl_prim_test: Generalize ring topology and duplications Allow user specified ring topology from command line and duplicated to requested number of workgroups: ./rccl_prim_test -w 12 -p copy -r "0 1 2 3|3 2 1 0|0 2 1 3|3 1 2 0|0 2 3 1|1 3 2 0" --- tools/rccl-prim-test/rccl_prim_test.cpp | 52 ++++++++++++++++++------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/tools/rccl-prim-test/rccl_prim_test.cpp b/tools/rccl-prim-test/rccl_prim_test.cpp index 23db2c8285..49d98394b9 100644 --- a/tools/rccl-prim-test/rccl_prim_test.cpp +++ b/tools/rccl-prim-test/rccl_prim_test.cpp @@ -243,11 +243,9 @@ static void setupRings(uint32_t *info, int *ring_0, int *ring_1) { printf("\n"); } findConnect(info, ring_0, deviceCnt); - printRing(0, ring_0, deviceCnt); ring_1[0] =0; for (int i = 1; i < deviceCnt; i++) ring_1[i] = ring_0[deviceCnt-i]; - printRing(1, ring_1, deviceCnt); } char* getCmdOption(char ** begin, char ** end, const std::string & option) { @@ -270,7 +268,7 @@ static const char* link_type_name[] = {"HT", "QPI", "PCIE", "IB", "XGMI"}; int main(int argc,char* argv[]) { if (cmdOptionExists(argv, argv + argc, "-h")) { - printf("./rccl_prim_test -w num_workgroups -p copy|localcopy|doublecopy|reduce|reducecopy|all -i iterations -n bytes -s 0|1\n"); + printf("./rccl_prim_test -w num_workgroups -p copy|localcopy|doublecopy|reduce|reducecopy|all -i iterations -n bytes -s 0|1 -r \"0 1 2 3|3 2 1 0\"\n"); exit(0); } @@ -299,6 +297,9 @@ int main(int argc,char* argv[]) sync = atol(s); if (sync) printf("Sync all GPUs before operation\n"); + char *r = getCmdOption(argv, argv + argc, "-r"); + if (r) printf("User specified ring topology: %s\n", r); + const char *ops[] = {"copy", "localcopy", "doublecopy", "reduce", "reducecopy", "read", "all"}; char *prim = getCmdOption(argv, argv + argc, "-p"); int op = NUM_OPS, begin_op, end_op; @@ -320,9 +321,34 @@ int main(int argc,char* argv[]) // Enable peer access setupPeers(connection_info); // clockwise and counter clockwise rings - int ring_0[MAX_GPU] = {-1, -1, -1, -1,-1, -1, -1, -1}; - int ring_1[MAX_GPU] = {-1, -1, -1, -1,-1, -1, -1, -1}; - setupRings(connection_info, ring_0, ring_1); + int ring[MAX_WORKGROUPS][MAX_GPU]; + for (int i = 0; i < MAX_WORKGROUPS; i++) + for (int j = 0; j Next GPU %d\n", i, j, next_gpu); h_transfer_data[i].dest0[j] = buff[next_gpu*MAX_WORKGROUPS+j] + N; h_transfer_data[i].dest1[j] = buff_coarse[i*MAX_WORKGROUPS+j] + N; @@ -467,10 +494,7 @@ int main(int argc,char* argv[]) HIPCHECK(hipGetDeviceProperties(&prop, i)); for (int j = 0; j < workgroups; j++) { int next_gpu; - if (j%2) - next_gpu = findNextGpu(ring_1, i, nGpu); - else - next_gpu = findNextGpu(ring_0, i, nGpu); + next_gpu = findNextGpu(ring[j], i, nGpu); uint32_t linktype; uint32_t hopcount;