set MAXCHANNELS to 128

[ROCm/rccl commit: ef442f8f92]
This commit is contained in:
Nusrat Islam
2024-05-23 15:27:43 -05:00
parent 2447161fb4
commit 48821ad0d7
7 changed files with 33 additions and 12 deletions
+2 -2
View File
@@ -241,7 +241,7 @@ __forceinline__ __device__ void ncclKernelMain(struct ncclDevComm* comm, struct
y = __popcll(channelMask.masks[i] & ((1ull<<x)-1));
y = total + y;
if (blockIdx.x == y) {
ncclShmem.channelId = y;
ncclShmem.channelId = x + total;
break;
}
}
@@ -251,7 +251,7 @@ __forceinline__ __device__ void ncclKernelMain(struct ncclDevComm* comm, struct
y = __popcll(channelMask.masks[i] & ((1ull<<x)-1));
y = y + total;
if (blockIdx.x == y) {
ncclShmem.channelId = y;
ncclShmem.channelId = x + total;
break;
}
}
+1 -1
View File
@@ -618,7 +618,7 @@ static void finishPlan(struct ncclKernelPlan* plan) {
int channelCount = 0;
//uint64_t channelMask = 0;
struct channelMasks channelMask;
for (int i =0; i < 4; i++) {
for (int i =0; i < MAXCHANNELS/64; i++) {
channelMask.masks[i] = 0;
}
bool hasProxyOps = false;
+3 -3
View File
@@ -908,7 +908,7 @@ ncclResult_t ncclTopoComputeP2pChannels(struct ncclComm* comm) {
comm->p2pnChannelsPerPeer = (ncclParamNChannelsPerPeer() == -2 ? nextPow2(minChannels) : ncclParamNChannelsPerPeer());
// Doubling P2P channels per peer on single node
if (comm->topo->nodes[GPU].count == comm->topo->nRanks && IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx94")) comm->p2pnChannelsPerPeer *= 2;
comm->p2pnChannels = nextPow2(comm->p2pnChannels);
comm->p2pnChannels = std::min(nextPow2(comm->p2pnChannels), 4*CHANNEL_LIMIT);
}
// Init channels that weren't used so far
@@ -918,7 +918,7 @@ ncclResult_t ncclTopoComputeP2pChannels(struct ncclComm* comm) {
// fill the whole space of nChannels. To do so we mirror the bits in the
// nChannels space.
for (int c=0; c<comm->p2pnChannels; c++) {
comm->p2pChannels[c] = mirrorBits(c, comm->p2pnChannels);
comm->p2pChannels[c] = mirrorBits(c, comm->p2pnChannels);
}
return ncclSuccess;
}
@@ -950,4 +950,4 @@ int ncclTopoPathAllNVLink(struct ncclTopoSystem* system) {
}
}
return minPath >= PATH_PIX ? 0 : 1;
}
}
+1 -1
View File
@@ -225,7 +225,7 @@ static void groupCleanup(struct ncclComm** groupCommHeadPtr, struct ncclComm** g
for (int i = 0; i < comm->nRanks; i++) {
comm->tasks.peers[i].sendSeen = false;
comm->tasks.peers[i].recvSeen = false;
for (int j = 0; j < 4; j++) {
for (int j = 0; j < MAXCHANNELS/64; j++) {
comm->connectSend[i].masks[j] = 0UL;
comm->connectRecv[i].masks[j] = 0UL;
}
+1 -1
View File
@@ -173,7 +173,7 @@ struct ncclNvlsMcHandleList {
};
struct channelMasks {
uint64_t masks[4];
uint64_t masks[MAXCHANNELS/64];
};
struct ncclKernelPlan {
+2 -1
View File
@@ -61,7 +61,8 @@ union ncclLLFifoLine {
};
#define WARP_SIZE warpSize
#define MAXCHANNELS 256
#define MAXCHANNELS 128
#define CHANNEL_LIMIT 16
#define NCCL_MAX_NTHREADS 256
+23 -3
View File
@@ -106,6 +106,9 @@ ncclResult_t ncclTransportP2pSetup(struct ncclComm* comm, struct ncclTopoGraph*
timeLast = timeStart; // struct copy
bool timeReported = false;
int count = 0;
int num = MAXCHANNELS/64;
NCCLCHECKGOTO(ncclStrongStreamAcquireUncaptured(&comm->sharedRes->hostStream), ret, fail);
// First time initialization
for (int i=1; i<comm->nRanks; i++) {
@@ -123,7 +126,16 @@ ncclResult_t ncclTransportP2pSetup(struct ncclComm* comm, struct ncclTopoGraph*
// The next M entries contain sendData, connection information for send connections
// It's not guaranteed that each entry of data has the same number of total or send/recv specific connections
int p = i-(done+1);
if ((recvMask.masks[0]) || (sendMask.masks[0])) NCCLCHECK(ncclCalloc(data+p, 2*MAXCHANNELS));
count = 0;
for (int j = 0; j < num; j++) {
if ((recvMask.masks[j]) || (sendMask.masks[j])) {
count++;
}
}
//if ((recvMask.masks[0]) || (sendMask.masks[0])) NCCLCHECK(ncclCalloc(data+p, 2*MAXCHANNELS));
if (count) NCCLCHECK(ncclCalloc(data+p, 2*MAXCHANNELS));
recvData[p] = data[p];
int sendChannels = 0, recvChannels = 0;
int type;
@@ -220,7 +232,15 @@ ncclResult_t ncclTransportP2pSetup(struct ncclComm* comm, struct ncclTopoGraph*
}
TIME_STOP(4);
}
if (sendMask.masks[0] || recvMask.masks[0]) {
count = 0;
for (int j = 0; j < num; j++) {
if ((recvMask.masks[j]) || (sendMask.masks[j])) {
count++;
}
}
//if (sendMask.masks[0] || recvMask.masks[0]) {
if (count) {
free(data[p]);
data[p] = NULL;
}
@@ -262,7 +282,7 @@ ncclResult_t ncclTransportP2pSetup(struct ncclComm* comm, struct ncclTopoGraph*
int sendPeer = (comm->rank + i) % comm->nRanks;
int flag = 0;
for (int j = 0; j < 4; j++) {
for (int j = 0; j < MAXCHANNELS/64; j++) {
if (recvPeer != sendPeer) {
if (comm->connectSend[sendPeer].masks[j] != 0UL)
NCCLCHECKGOTO(bootstrapSend(comm->bootstrap, sendPeer, bootstrapTag, &flag, sizeof(int)), ret, fail);