3c223c105a
Add network communication through another GPU connected with NVLink (PXN). Add aggregation of messages coming from different local GPUs through PXN and going to the same destination. Add new v5 plugin API with grouped receives and tags. Add compat for v4 plugins. Add naming of NCCL threads to help debugging. Fix NVLink detection and avoid data corruption when some NVLinks are down. Add support for Relaxed Ordering for IB. Add profiling and timing infrastructure.
58 lines
1.2 KiB
C
58 lines
1.2 KiB
C
/*************************************************************************
|
|
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* See LICENSE.txt for license information
|
|
************************************************************************/
|
|
|
|
#ifndef NCCL_INFO_H_
|
|
#define NCCL_INFO_H_
|
|
|
|
#include "nccl.h"
|
|
#include "devcomm.h"
|
|
#include "collectives.h"
|
|
|
|
typedef enum : uint8_t {
|
|
ncclPatternRing,
|
|
ncclPatternRingTwice,
|
|
ncclPatternPipelineFrom,
|
|
ncclPatternPipelineTo,
|
|
ncclPatternTreeUp,
|
|
ncclPatternTreeDown,
|
|
ncclPatternTreeUpDown,
|
|
ncclPatternCollTreeUpDown,
|
|
ncclPatternSend,
|
|
ncclPatternRecv
|
|
} ncclPattern_t;
|
|
|
|
// Used to pass NCCL call information between functions
|
|
struct ncclInfo {
|
|
ncclFunc_t coll;
|
|
const char* opName;
|
|
// NCCL Coll Args
|
|
const void* sendbuff;
|
|
void* recvbuff;
|
|
size_t count;
|
|
ncclDataType_t datatype;
|
|
ncclRedOp_t op;
|
|
int root; // peer for p2p operations
|
|
ncclComm_t comm;
|
|
cudaStream_t stream;
|
|
// Algorithm details
|
|
int chunkSteps;
|
|
int sliceSteps;
|
|
// Computed later
|
|
ncclDevRedOpFull opFull;
|
|
int algorithm;
|
|
int protocol;
|
|
ncclPattern_t pattern;
|
|
int nChannels;
|
|
int nThreads;
|
|
size_t nBytes;
|
|
int nstepsPerLoop;
|
|
int nchunksPerLoop;
|
|
int chunkSize;
|
|
int channelId;
|
|
};
|
|
|
|
#endif
|