doubling debug buffer size with increased channels

[ROCm/rccl commit: 0634c5c8e1]
Cette révision appartient à :
Nusrat Islam
2024-05-29 12:21:24 -05:00
Parent 48821ad0d7
révision b34fd115a1
3 fichiers modifiés avec 5 ajouts et 5 suppressions
+1 -1
Voir le fichier
@@ -169,7 +169,7 @@ void ncclDebugLog(ncclDebugLogLevel level, unsigned long flags, const char *file
cudaGetDevice(&cudaDev);
}
char buffer[2048];
char buffer[4096];
size_t len = 0;
if (level == NCCL_LOG_WARN) {
len = snprintf(buffer, sizeof(buffer), "\n%s:%d:%d [%d] %s:%d NCCL WARN ",
+2 -2
Voir le fichier
@@ -1532,7 +1532,7 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, struct ncclComm* p
TRACE(NCCL_INIT, "rank %d nranks %d - BUILT %d TREES/RINGS", rank, nranks, comm->nChannels);
char line[2048];
char line[4096];
line[0]='\0';
for (int c=0; c<comm->nChannels; c++) {
struct ncclTree* tree = &comm->channels[c].tree;
@@ -1541,7 +1541,7 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, struct ncclComm* p
INFO(NCCL_GRAPH, "Ring %d : %d -> %d -> %d comm %p nRanks %02d busId %lx", c, comm->channels[c].ring.prev,
comm->rank, comm->channels[c].ring.next, comm, comm->nRanks, comm->busId);
}
line[2047] = '\0';
line[4095] = '\0';
INFO(NCCL_INIT, "Trees%s comm %p nRanks %02d busId %lx", line, comm, comm->nRanks, comm->busId);
NCCLCHECKGOTO(computeBuffSizes(comm), ret, fail);
+2 -2
Voir le fichier
@@ -1089,7 +1089,7 @@ ncclResult_t initTransportsRank_3(struct ncclComm* comm, struct allGatherInfo *a
TRACE(NCCL_INIT, "rank %d nranks %d - BUILT %d TREES/RINGS", rank, nranks, comm->nChannels);
char line[2048];
char line[4096];
line[0]='\0';
for (int c=0; c<comm->nChannels; c++) {
struct ncclTree* tree = &comm->channels[c].tree;
@@ -1098,7 +1098,7 @@ ncclResult_t initTransportsRank_3(struct ncclComm* comm, struct allGatherInfo *a
INFO(NCCL_GRAPH, "Ring %d : %d -> %d -> %d comm %p nRanks %02d busId %lx", c, comm->channels[c].ring.prev,
comm->rank, comm->channels[c].ring.next, comm, comm->nRanks, comm->busId);
}
line[2047] = '\0';
line[4095] = '\0';
INFO(NCCL_INIT, "Trees%s comm %p nRanks %02d busId %lx", line, comm, comm->nRanks, comm->busId);
//NCCLCHECKGOTO(computeBuffSizes(comm), ret, fail);