d7293281f3
[ROCm/rccl commit: 858b4e76eb]
34 satır
1.2 KiB
C
34 satır
1.2 KiB
C
/*************************************************************************
|
|
* Copyright (c) 2015-2019, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* See LICENSE.txt for license information
|
|
************************************************************************/
|
|
|
|
#ifndef NCCL_CHANNEL_H_
|
|
#define NCCL_CHANNEL_H_
|
|
#include "comm.h"
|
|
#include "utils.h"
|
|
|
|
#include <algorithm>
|
|
|
|
ncclResult_t initChannel(struct ncclComm* comm, int channelid);
|
|
ncclResult_t initNvlsChannel(struct ncclComm* comm, int channelId, struct ncclComm* parent, bool share);
|
|
ncclResult_t initCollnetChannel(struct ncclComm* comm, int channelId, struct ncclComm* parent, bool share);
|
|
ncclResult_t freeChannel(struct ncclChannel* channel, int nRanks, int collnetNRanks, int nvlsNRanks);
|
|
|
|
inline uint8_t ncclP2pChannelBaseForRound(struct ncclComm* comm, int p2pRound, int p2pBatchEnable = 0) {
|
|
int base;
|
|
if (comm->nNodes > 1) {
|
|
int nodeDelta = p2pRound/comm->maxLocalRanks;
|
|
int localDelta = p2pRound%comm->maxLocalRanks;
|
|
int batchSize = (comm->nNodes > 2 && p2pBatchEnable) ? NCCL_MAX_DEV_WORK_P2P_PER_BATCH : 1;
|
|
base = nodeDelta*divUp(comm->maxLocalRanks, batchSize);
|
|
base += localDelta/batchSize;
|
|
} else {
|
|
base = p2pRound;
|
|
}
|
|
return base & 0xff;
|
|
}
|
|
|
|
#endif
|