Merge pull request #633 from edgargabriel/topic/topo-binary-tree

introduce a hw topology aware bintree

[ROCm/rccl commit: 4972c129e3]
This commit is contained in:
Edgar Gabriel
2022-10-05 17:06:54 -05:00
committed by GitHub
4 changed files with 204 additions and 2 deletions
+1 -1
View File
@@ -1224,7 +1224,7 @@ comp_next:
NCCLCHECK(getPatternInfo(info));
NCCLCHECK(getLoopInfo(info));
if (info->comm->topo->pivotA2ANumBiRings == 3 ) {
if (ncclTypeSize(info->datatype)*info->count > 65536) {
if (ncclTypeSize(info->datatype)*info->count > 131072) {
work->pad_0 = 1;
} else {
work->pad_0 = 2;
+197
View File
@@ -163,6 +163,203 @@ ncclResult_t ncclBinaryTreePostset(struct ncclComm* comm,
return ncclSuccess;
}
#define NUM_HAYABUSA_TREES 2
static bool hayabusa_tree_matrix_is_init=false;
static int hayabusa_tree_matrix[NUM_HAYABUSA_TREES][16][4];
static void hayabusa_tree_matrix_init()
{
if (hayabusa_tree_matrix_is_init)
return;
// index = rank of proc, child0, child1, child2, parent
// channel 0: root is 15
hayabusa_tree_matrix[0][0][0] = 1;
hayabusa_tree_matrix[0][0][1] = -1;
hayabusa_tree_matrix[0][0][2] = -1;
hayabusa_tree_matrix[0][0][3] = 4;
hayabusa_tree_matrix[0][1][0] = -1;
hayabusa_tree_matrix[0][1][1] = -1;
hayabusa_tree_matrix[0][1][2] = -1;
hayabusa_tree_matrix[0][1][3] = 0;
hayabusa_tree_matrix[0][2][0] = 3;
hayabusa_tree_matrix[0][2][1] = -1;
hayabusa_tree_matrix[0][2][2] = -1;
hayabusa_tree_matrix[0][2][3] = 6;
hayabusa_tree_matrix[0][3][0] = -1;
hayabusa_tree_matrix[0][3][1] = -1;
hayabusa_tree_matrix[0][3][2] = -1;
hayabusa_tree_matrix[0][3][3] = 2;
hayabusa_tree_matrix[0][4][0] = 0;
hayabusa_tree_matrix[0][4][1] = -1;
hayabusa_tree_matrix[0][4][2] = -1;
hayabusa_tree_matrix[0][4][3] = 5;
hayabusa_tree_matrix[0][5][0] = 4;
hayabusa_tree_matrix[0][5][1] = -1;
hayabusa_tree_matrix[0][5][2] = -1;
hayabusa_tree_matrix[0][5][3] = 14;
hayabusa_tree_matrix[0][6][0] = 2;
hayabusa_tree_matrix[0][6][1] = 7;
hayabusa_tree_matrix[0][6][2] = -1;
hayabusa_tree_matrix[0][6][3] = 14;
hayabusa_tree_matrix[0][7][0] = -1;
hayabusa_tree_matrix[0][7][1] = -1;
hayabusa_tree_matrix[0][7][2] = -1;
hayabusa_tree_matrix[0][7][3] = 6;
hayabusa_tree_matrix[0][8][0] = -1;
hayabusa_tree_matrix[0][8][1] = -1;
hayabusa_tree_matrix[0][8][2] = -1;
hayabusa_tree_matrix[0][8][3] = 9;
hayabusa_tree_matrix[0][9][0] = 13;
hayabusa_tree_matrix[0][9][1] = 8;
hayabusa_tree_matrix[0][9][2] = -1;
hayabusa_tree_matrix[0][9][3] = 11;
hayabusa_tree_matrix[0][10][0] = -1;
hayabusa_tree_matrix[0][10][1] = -1;
hayabusa_tree_matrix[0][10][2] = -1;
hayabusa_tree_matrix[0][10][3] = 11;
hayabusa_tree_matrix[0][11][0] = 9;
hayabusa_tree_matrix[0][11][1] = 10;
hayabusa_tree_matrix[0][11][2] = -1;
hayabusa_tree_matrix[0][11][3] = 15;
hayabusa_tree_matrix[0][12][0] = -1;
hayabusa_tree_matrix[0][12][1] = -1;
hayabusa_tree_matrix[0][12][2] = -1;
hayabusa_tree_matrix[0][12][3] = 13;
hayabusa_tree_matrix[0][13][0] = 12;
hayabusa_tree_matrix[0][13][1] = -1;
hayabusa_tree_matrix[0][13][2] = -1;
hayabusa_tree_matrix[0][13][3] = 9;
hayabusa_tree_matrix[0][14][0] = 5;
hayabusa_tree_matrix[0][14][1] = 6;
hayabusa_tree_matrix[0][14][2] = -1;
hayabusa_tree_matrix[0][14][3] = 15;
hayabusa_tree_matrix[0][15][0] = 14;
hayabusa_tree_matrix[0][15][1] = 11;
hayabusa_tree_matrix[0][15][2] = -1;
hayabusa_tree_matrix[0][15][3] = -1;
//Channel 1: root is 6
hayabusa_tree_matrix[1][0][0] = -1;
hayabusa_tree_matrix[1][0][1] = -1;
hayabusa_tree_matrix[1][0][2] = -1;
hayabusa_tree_matrix[1][0][3] = 1;
hayabusa_tree_matrix[1][1][0] = 5;
hayabusa_tree_matrix[1][1][1] = 0;
hayabusa_tree_matrix[1][1][2] = -1;
hayabusa_tree_matrix[1][1][3] = 3;
hayabusa_tree_matrix[1][2][0] = -1;
hayabusa_tree_matrix[1][2][1] = -1;
hayabusa_tree_matrix[1][2][2] = -1;
hayabusa_tree_matrix[1][2][3] = 3;
hayabusa_tree_matrix[1][3][0] = 1;
hayabusa_tree_matrix[1][3][1] = 2;
hayabusa_tree_matrix[1][3][2] = -1;
hayabusa_tree_matrix[1][3][3] = 7;
hayabusa_tree_matrix[1][4][0] = -1;
hayabusa_tree_matrix[1][4][1] = -1;
hayabusa_tree_matrix[1][4][2] = -1;
hayabusa_tree_matrix[1][4][3] = 5;
hayabusa_tree_matrix[1][5][0] = 4;
hayabusa_tree_matrix[1][5][1] = -1;
hayabusa_tree_matrix[1][5][2] = -1;
hayabusa_tree_matrix[1][5][3] = 1;
hayabusa_tree_matrix[1][6][0] = 7;
hayabusa_tree_matrix[1][6][1] = 13;
hayabusa_tree_matrix[1][6][2] = -1;
hayabusa_tree_matrix[1][6][3] = -1;
hayabusa_tree_matrix[1][7][0] = 3;
hayabusa_tree_matrix[1][7][1] = 15;
hayabusa_tree_matrix[1][7][2] = -1;
hayabusa_tree_matrix[1][7][3] = 6;
hayabusa_tree_matrix[1][8][0] = 9;
hayabusa_tree_matrix[1][8][1] = -1;
hayabusa_tree_matrix[1][8][2] = -1;
hayabusa_tree_matrix[1][8][3] = 12;
hayabusa_tree_matrix[1][9][0] = -1;
hayabusa_tree_matrix[1][9][1] = -1;
hayabusa_tree_matrix[1][9][2] = -1;
hayabusa_tree_matrix[1][9][3] = 8;
hayabusa_tree_matrix[1][10][0] = -1;
hayabusa_tree_matrix[1][10][1] = -1;
hayabusa_tree_matrix[1][10][2] = -1;
hayabusa_tree_matrix[1][10][3] = 11;
hayabusa_tree_matrix[1][11][0] = 10;
hayabusa_tree_matrix[1][11][1] = -1;
hayabusa_tree_matrix[1][11][2] = -1;
hayabusa_tree_matrix[1][11][3] = 15;
hayabusa_tree_matrix[1][12][0] = 8;
hayabusa_tree_matrix[1][12][1] = -1;
hayabusa_tree_matrix[1][12][2] = -1;
hayabusa_tree_matrix[1][12][3] = 13;
hayabusa_tree_matrix[1][13][0] = 12;
hayabusa_tree_matrix[1][13][1] = -1;
hayabusa_tree_matrix[1][13][2] = -1;
hayabusa_tree_matrix[1][13][3] = 6;
hayabusa_tree_matrix[1][14][0] = -1;
hayabusa_tree_matrix[1][14][1] = -1;
hayabusa_tree_matrix[1][14][2] = -1;
hayabusa_tree_matrix[1][14][3] = 15;
hayabusa_tree_matrix[1][15][0] = 11;
hayabusa_tree_matrix[1][15][1] = 14;
hayabusa_tree_matrix[1][15][2] = -1;
hayabusa_tree_matrix[1][15][3] = 7;
hayabusa_tree_matrix_is_init = true;
}
static void set_channel_info(int c, int rank, struct ncclChannel *channel)
{
channel->binTree.down[0] = hayabusa_tree_matrix[c%NUM_HAYABUSA_TREES][rank][0];
channel->binTree.down[1] = hayabusa_tree_matrix[c%NUM_HAYABUSA_TREES][rank][1];
channel->binTree.down[2] = hayabusa_tree_matrix[c%NUM_HAYABUSA_TREES][rank][2];
channel->binTree.up = hayabusa_tree_matrix[c%NUM_HAYABUSA_TREES][rank][3];
}
ncclResult_t ncclBinaryTreeHayabusaPostset(struct ncclComm* comm,
struct ncclTopoGraph* treeGraph) {
int nChannels = comm->nChannels;
hayabusa_tree_matrix_init();
for (int c=0; c<nChannels; c++) {
struct ncclChannel* channel = comm->channels+c;
set_channel_info(c, comm->localRank, channel);
}
return ncclSuccess;
}
ncclResult_t ncclTreeBasePostset(struct ncclComm* comm,
struct ncclTopoGraph* treeGraph) {
+1
View File
@@ -116,6 +116,7 @@ ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, int* treePa
ncclResult_t ncclTreeBasePostset(struct ncclComm* comm, struct ncclTopoGraph* treeGraph);
ncclResult_t ncclBinaryTreePostset(struct ncclComm* comm, struct ncclTopoGraph* treeGraph);
ncclResult_t ncclBinaryTreeHayabusaPostset(struct ncclComm* comm, struct ncclTopoGraph* treeGraph);
ncclResult_t ncclTopoTuneModel(struct ncclComm* comm, int minCompCap, int maxCompCap, struct ncclTopoGraph* treeGraph, struct ncclTopoGraph* ringGraph, struct ncclTopoGraph* collNetGraph);
#include "info.h"
+5 -1
View File
@@ -1042,7 +1042,11 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, ncclUniqueId* comm
if (comm->topo->pivotA2ANumBiRings == 3) {
NCCLCHECK(ncclTreeBasePostset(comm, &treeGraph));
NCCLCHECK(ncclBinaryTreePostset(comm, &treeGraph));
if (comm->virtualId == -1) {
NCCLCHECK(ncclBinaryTreeHayabusaPostset(comm, &treeGraph));
} else {
NCCLCHECK(ncclBinaryTreePostset(comm, &treeGraph));
}
}
free(allTopoRanks);