Files
rocm-systems/src/include/info.h
T
Sylvain Jeaugey a46ea10583 2.9.6-1
Add support for CUDA graphs.
Fuse BCM Gen4 switches to avoid suboptimal performance on some platforms. Issue #439.
Fix bootstrap issue caused by connection reordering.
Fix CPU locking block.
Improve CollNet algorithm.
Improve performance on DGX A100 for communicators with only one GPU per node.
2021-04-12 16:00:46 -07:00

58 строки
1.2 KiB
C

/*************************************************************************
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
#ifndef NCCL_INFO_H_
#define NCCL_INFO_H_
#include "nccl.h"
#include "devcomm.h"
typedef enum {
ncclPatternRing,
ncclPatternRingTwice,
ncclPatternPipelineFrom,
ncclPatternPipelineTo,
ncclPatternTreeUp,
ncclPatternTreeDown,
ncclPatternTreeUpDown,
ncclPatternCollTreeUpDown
} ncclPattern_t;
// Used to pass NCCL call information between functions
struct ncclInfo {
ncclFunc_t coll;
const char* opName;
// NCCL Coll Args
const void* sendbuff;
void* recvbuff;
size_t count;
ncclDataType_t datatype;
ncclRedOp_t op;
int root;
ncclComm_t comm;
cudaStream_t stream;
// Algorithm details
int chunkSteps;
int sliceSteps;
// Computed later
int algorithm;
int protocol;
ncclPattern_t pattern;
int nChannels;
int nThreads;
size_t nBytes;
int nstepsPerLoop;
int nchunksPerLoop;
ssize_t sendbytes;
ssize_t recvbytes;
int recvChunkSize;
int sendChunkSize;
uint32_t delta;
int channelId;
};
#endif