From 354e0b29a697ab975c19e0dc747bb74f9148bf57 Mon Sep 17 00:00:00 2001 From: gilbertlee-amd <44450918+gilbertlee-amd@users.noreply.github.com> Date: Thu, 30 May 2024 14:02:14 -0600 Subject: [PATCH] Addressing possible out-of-bounds mem access during channel duplication (#1193) --- src/graph/connect.cc | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/graph/connect.cc b/src/graph/connect.cc index cdd0252f71..f432495c0b 100644 --- a/src/graph/connect.cc +++ b/src/graph/connect.cc @@ -86,7 +86,7 @@ ncclResult_t ncclTopoPreset(struct ncclComm* comm, struct ncclTopoGraph** graphs topoRanks->nvlsHeads[topoRanks->nvlsHeadNum++] = nvlsIntra[0]; } } - + return ncclSuccess; } @@ -637,8 +637,13 @@ ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, int* treePa for (int c=0; cchannels+c; struct ncclChannel* channel1 = channel0+nChannels; - channel0->ring.prev = channel1->ring.prev = ringPrev[c*nranks+comm->rank]; - channel0->ring.next = channel1->ring.next = ringNext[c*nranks+comm->rank]; + channel0->ring.prev = ringPrev[c*nranks+comm->rank]; + channel0->ring.next = ringNext[c*nranks+comm->rank]; + + if (c + nChannels < MAXCHANNELS) { + channel1->ring.prev = channel0->ring.prev; + channel1->ring.next = channel0->ring.next; + } } // Duplication should be complete now @@ -699,4 +704,4 @@ ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, int* treePa free(nvlsHeads); return ncclSuccess; -} \ No newline at end of file +}