From 6f7eb653082953ff0a9fe54b499441e76da5d060 Mon Sep 17 00:00:00 2001 From: akolliasAMD <99202231+akolliasAMD@users.noreply.github.com> Date: Fri, 15 Sep 2023 15:01:33 -0600 Subject: [PATCH] changed the form that RCCL_TREE uses (#888) * changed the form that RCCL_TREE uses [ROCm/rccl commit: b85d73c02e39edeb2752f00173d8511a5e7bad14] --- projects/rccl/src/graph/connect.cc | 131 +++++++++++++++++++------ projects/rccl/src/graph/rome_models.cc | 70 ++++++++----- projects/rccl/src/graph/search.cc | 2 +- projects/rccl/src/include/graph.h | 2 +- 4 files changed, 145 insertions(+), 60 deletions(-) diff --git a/projects/rccl/src/graph/connect.cc b/projects/rccl/src/graph/connect.cc index b9d9dc57e0..8d0ff07eba 100644 --- a/projects/rccl/src/graph/connect.cc +++ b/projects/rccl/src/graph/connect.cc @@ -73,53 +73,122 @@ ncclResult_t ncclTopoPreset(struct ncclComm* comm, struct ncclTopoGraph** graphs return ncclSuccess; } +bool isRankHere(const char* s, int start, int end, int rank) { + if (end <= start || start < 0 || end < 0) + return false; + int num = 0; + while (start < end) { + char currChar = s[start]; + if (isdigit(currChar)) { + num = num * 10 + (currChar - '0'); + if (isdigit(s[start+1])) { + start++; + continue; + } + } + else if (currChar == '(' || currChar == ')') { + start++; + num = 0; + continue; + } + if (num == rank) return true; + start++; + } + return false; +} + ncclResult_t ncclTreeBasePostset(struct ncclComm* comm, struct ncclTopoGraph* treeGraph) { int x=0, y=0; - for (int i=0; treeGraph->treeBase[i][0]!=-1; i++) + for (int i=0; treeGraph->treeBase[i][0]!=0; i++) { x=i+1; } - for (int i=0; treeGraph->treeBase[0][i]!=-1; i++) - { - y=i+1; - } - if( treeGraph->treeBase[0][0] == -1) return ncclSuccess; + if( treeGraph->treeBase[0][0] == 0) return ncclSuccess; int nChannels = comm->nChannels; int localRanks = comm->topo->nodes[GPU].count; //new tree - for (int c=0; ctreeBase[buff][(ko+(localRanks-1)/2)%localRanks]; + char tempString[NCCL_TOPO_MAX_NODES*4]; + int ko=0; + while (treeGraph->treeBase[buff][ko] != 0) { + tempString[ko] = treeGraph->treeBase[buff][ko]; + ko++; } - - - struct ncclChannel* channel = comm->channels+c; - + tempString[ko]=0; + int start = 0; int curRank = comm->rank; - int arrayIndex; - - for (int i=0; itree.up = -1; - channel->tree.down[0] = tempArray[i+1]; - channel->tree.down[1] = tempArray[localRanks-1]; - channel->tree.down[2] = -1; - } - else { - channel->tree.up = i > localRanks/2 ? tempArray[(i+1)%localRanks] : tempArray[i-1]; - channel->tree.down[0] = i > localRanks/2 ? tempArray[i-1] : tempArray[i+1]; - if ((i == localRanks/2) || (i == (localRanks/2 + 1))) { - channel->tree.down[0] = -1; + struct ncclChannel* channel = comm->channels+c; + int end = 0; + while (tempString[end] != 0) end++; + int parent = -1; + // constructing a number from the continuous digits + while (start < end) { + int num = 0, num_found = 0; + start++; + while (start < end && tempString[start] != '(' + && tempString[start] != ')') { + int num_here = (int)(tempString[start] - '0'); + num = num * 10 + num_here; + start = start + 1; + if (tempString[start] == '(' || tempString[start] == ')' || start == end) num_found = 1; + } + if (num_found != 0 && num == curRank) { + channel->tree.up = parent; + int depth = 0; + for (int childId = 0; childId < NCCL_MAX_TREE_ARITY; childId++) { + int or_start = start; + int child = -1; + channel->tree.down[childId] = -1; + if (or_start >= end -1) continue; + num=0; + or_start++; + while (tempString[or_start] != 0 && tempString[or_start] != '(' + && tempString[or_start] != ')') { + int num_here = (int)(tempString[or_start] - '0'); + num = num * 10 + num_here; + or_start++; + } + child = num; + // find next child start + while (start < end) { + if (tempString[start] == '(' ) depth++; + else if(tempString[start] == ')') depth--; + if (depth == 0) break; // next child + start++; + } + start++; + channel->tree.down[childId] = child; + // get kids, update numbers, get out of this string + } + break; + } + else { //go to the next one + parent = num; + int start_c = start; + int end_c = start_c; + while (end_c < end) { + int depth = 0; + while (end_c < end) { + if (tempString[end_c] == '(' ) depth++; + else if(tempString[end_c] == ')') depth--; + if (depth == 0) break; // next child + end_c++; + } + if (isRankHere(tempString, start_c, end_c, curRank)) { + start = start_c; + end = end_c; + break; + } + else { + end_c++; + start_c = end_c; } - channel->tree.down[1] = -1; - channel->tree.down[2] = -1; } } } + } return ncclSuccess; } diff --git a/projects/rccl/src/graph/rome_models.cc b/projects/rccl/src/graph/rome_models.cc index c254e47ab8..3afc58ce55 100644 --- a/projects/rccl/src/graph/rome_models.cc +++ b/projects/rccl/src/graph/rome_models.cc @@ -343,7 +343,7 @@ static struct rcclRomeModel rome_model_43 = { .pattern = "20202020", .ringBase = "0 1 2 3 4 5 6 7|0 2 5 7 4 6 1 3|0 3 1 6 4 7 5 2|0 7 6 5 4 3 2 1|0 1 2 3 4 5 6 7|0 2 5 7 4 6 1 3|0 3 1 6 4 7 5 2|0 7 6 5 4 3 2 1|0 1 2 3 4 5 6 7|0 2 5 7 4 6 1 3|0 3 1 6 4 7 5 2|0 7 6 5 4 3 2 1", .options = "treeDefined=1", - .treeBase = "1 0 3 2 5 6 7 4|3 1 0 2 5 7 6 4|0 3 1 2 5 7 4 6|5 4 7 6 1 0 2 3|7 5 4 6 1 2 0 3|4 7 5 6 1 0 3 2|0 3 2 1 6 7 5 4|0 2 3 1 6 4 7 5|2 0 3 1 6 5 4 7|7 6 4 5 2 3 1 0|7 4 6 5 2 0 3 1|6 7 4 5 2 1 0 3", + .treeBase = "(2(5(6(7(4))))(3(0(1))))|(2(5(7(6(4))))(0(1(3))))|(2(5(7(4(6))))(1(3(0))))|(6(1(0(2(3))))(7(4(5))))|(6(1(2(0(3))))(4(5(7))))|(6(1(0(3(2))))(5(7(4))))|(1(6(7(5(4))))(2(3(0))))|(1(6(4(7(5))))(3(2(0))))|(1(6(5(4(7))))(3(0(2))))|(5(2(3(1(0))))(4(6(7))))|(5(2(0(3(1))))(6(4(7))))|(5(2(1(0(3))))(4(7(6))))", }; static struct rcclRomeModel rome_model_55 = { @@ -370,7 +370,7 @@ static struct rcclRomeModel rome_model_56 = { .pattern = "40404040", .ringBase = "0 1 3 2 6 7 15 14 10 11 9 8 12 13 5 4|0 1 2 3 7 6 13 12 8 9 10 11 15 14 5 4|0 2 3 7 6 14 15 11 10 8 9 13 12 4 5 1|4 5 13 12 8 9 11 10 14 15 7 6 2 3 1 0|4 5 14 15 11 10 9 8 12 13 6 7 3 2 1 0|1 5 4 12 13 9 8 10 11 15 14 6 7 3 2 0", .options = "pivotA2AEnabled=1,pivotA2ANumBiRings=3,tuning=1,mscclEnabled=1,treeDefined=1", - .treeBase= "11 9 8 12 13 5 4 0 1 3 2 6 7 15 14 10|11 15 14 5 4 0 1 2 3 7 6 13 12 8 9 10|5 1 0 2 3 7 6 14 15 11 10 8 9 13 12 4|1 3 2 6 7 15 14 10 11 9 8 12 13 5 4 0|3 7 6 13 12 8 9 10 11 15 14 5 4 0 1 2|15 11 10 8 9 13 12 4 5 1 0 2 3 7 6 14|13 5 4 0 1 3 2 6 7 15 14 10 11 9 8 12|4 0 1 2 3 7 6 13 12 8 9 10 11 15 14 5|3 7 6 14 15 11 10 8 9 13 12 4 5 1 0 2|7 15 14 10 11 9 8 12 13 5 4 0 1 3 2 6|12 8 9 10 11 15 14 5 4 0 1 2 3 7 6 13|9 13 12 4 5 1 0 2 3 7 6 14 15 11 10 8", + .treeBase= "(0(1(3(2(6(7(15(14(10))))))))(4(5(13(12(8(9(11))))))))|(2(3(7(6(13(12(8(9(10))))))))(1(0(4(5(14(15(11))))))))|(14(15(11(10(8(9(13(12(4))))))))(6(7(3(2(0(1(5))))))))|(10(11(9(8(12(13(5(4(0))))))))(14(15(7(6(2(3(1))))))))|(10(11(15(14(5(4(0(1(2))))))))(9(8(12(13(6(7(3))))))))|(4(5(1(0(2(3(7(6(14))))))))(12(13(9(8(10(11(15))))))))|(6(7(15(14(10(11(9(8(12))))))))(2(3(1(0(4(5(13))))))))|(13(12(8(9(10(11(15(14(5))))))))(6(7(3(2(1(0(4))))))))|(8(9(13(12(4(5(1(0(2))))))))(10(11(15(14(6(7(3))))))))|(12(13(5(4(0(1(3(2(6))))))))(8(9(11(10(14(15(7))))))))|(5(4(0(1(2(3(7(6(13))))))))(14(15(11(10(9(8(12))))))))|(2(3(7(6(14(15(11(10(8))))))))(0(1(5(4(12(13(9))))))))", }; static struct rcclRomeModel rome_model_58 = { @@ -763,23 +763,22 @@ end: /* Parse user defined treeBase for complicated trees. Format is like : - * "10 11|14 15|6 7|2 3|0 1|4 5|12 13|8 9" + * "(4(2(3)(1))(6(5)))" * * Rings with a non-matching number of gpus are ignored so we can provide * rings for multiple cases. */ ncclResult_t parseGraphLight(const char* str, struct ncclTopoSystem* system, struct ncclTopoGraph* graph, int* gpu_map) { - int gpus[NCCL_TOPO_MAX_NODES]; + int gpus[NCCL_TOPO_MAX_NODES]; //transcribe/change according to gpu_map int nChannels = 0; int gpu = 0; int offset = 0; + int start_offset = offset; if (str[0] == 0) { - graph->treeBase[0][0] = -1; + graph->treeBase[0][0] = 0; return ncclSuccess; } - int status = 0; // 0 : between numbers, 1 : inside number, 2: start NET, 3: inside NET - int nets[NCCL_TOPO_MAX_NODES*2]; - int net_offset = 0, net_count = 0; + int status = 0; // 0 : between numbers, 1 : inside number int ngpus = system->nodes[GPU].count; int x=0, y=0; do { @@ -800,32 +799,49 @@ ncclResult_t parseGraphLight(const char* str, struct ncclTopoSystem* system, str } status = 0; if (str[offset] == '|' || str[offset] == 0) { - for (int r=0; rnodes[GPU].nodes[j].gpu.dev) - break; - if (j < ngpus) - { - graph->treeBase[x][r] = system->nodes[GPU].nodes[j].gpu.rank; - y=r; + int r = 0, y = 0; + while(start_offset < offset) { + // for (int r=0; rtreeBase[x][y] = str[start_offset]; + y++; + start_offset++; } - else - return ncclInternalError; + else { + int g = gpus[r]; + // remap if needed + if (gpu_map) g = gpu_map[g]; + r++; + int j = 0; + // Translate gpu numbers into ranks + for (j = 0; j < ngpus; j++) + if (g == system->nodes[GPU].nodes[j].gpu.dev) + break; + if (j < ngpus) + { + while (str[start_offset] != '(' && str[start_offset] != ')') start_offset++; + char number_str[10]; + sprintf(number_str, "%d", g); + int k=0; + while (number_str[k] != 0) { + graph->treeBase[x][y]=number_str[k]; + y++; + k++; + } + } + else + return ncclInternalError; + } + } - y++; - graph->treeBase[x][y] = -1; + graph->treeBase[x][y] = 0; x++; gpu=0; + start_offset = offset + 1; } } } while (str[offset++] != 0); - graph->treeBase[x][0] = -1; - + graph->treeBase[x][0] = 0; return ncclSuccess; } diff --git a/projects/rccl/src/graph/search.cc b/projects/rccl/src/graph/search.cc index dcdee9df1a..09841beba1 100644 --- a/projects/rccl/src/graph/search.cc +++ b/projects/rccl/src/graph/search.cc @@ -887,7 +887,7 @@ ncclResult_t ncclTopoCompute(ncclTopoSystem* system, struct ncclTopoGraph* graph } str = getenv("NCCL_RINGS"); - char* strTrees = getenv("NCCL_TREES"); + char* strTrees = getenv("RCCL_TREES"); if (str || strTrees) { // user supplied topo diff --git a/projects/rccl/src/include/graph.h b/projects/rccl/src/include/graph.h index 69726e08de..66a85c7ce3 100644 --- a/projects/rccl/src/include/graph.h +++ b/projects/rccl/src/include/graph.h @@ -96,7 +96,7 @@ struct ncclTopoGraph { int inter[MAXCHANNELS*2]; int nIntraChannels; int intraNets[MAXCHANNELS*NCCL_TOPO_MAX_NODES*2]; - int treeBase[NCCL_TOPO_MAX_NODES][NCCL_TOPO_MAX_NODES]; + char treeBase[NCCL_TOPO_MAX_NODES][NCCL_TOPO_MAX_NODES*4]; }; ncclResult_t ncclTopoCompute(struct ncclTopoSystem* system, struct ncclTopoGraph* graph);