Files
rocm-systems/src/include/info.h
T
Sylvain Jeaugey 3c223c105a 2.12.7-1
Add network communication through another GPU connected with NVLink
(PXN).
Add aggregation of messages coming from different local GPUs through
PXN and going to the same destination.
Add new v5 plugin API with grouped receives and tags.
Add compat for v4 plugins.
Add naming of NCCL threads to help debugging.
Fix NVLink detection and avoid data corruption when some NVLinks are
down.
Add support for Relaxed Ordering for IB.
Add profiling and timing infrastructure.
2022-03-02 20:48:56 +01:00

58 lines
1.2 KiB
C

/*************************************************************************
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
#ifndef NCCL_INFO_H_
#define NCCL_INFO_H_
#include "nccl.h"
#include "devcomm.h"
#include "collectives.h"
typedef enum : uint8_t {
ncclPatternRing,
ncclPatternRingTwice,
ncclPatternPipelineFrom,
ncclPatternPipelineTo,
ncclPatternTreeUp,
ncclPatternTreeDown,
ncclPatternTreeUpDown,
ncclPatternCollTreeUpDown,
ncclPatternSend,
ncclPatternRecv
} ncclPattern_t;
// Used to pass NCCL call information between functions
struct ncclInfo {
ncclFunc_t coll;
const char* opName;
// NCCL Coll Args
const void* sendbuff;
void* recvbuff;
size_t count;
ncclDataType_t datatype;
ncclRedOp_t op;
int root; // peer for p2p operations
ncclComm_t comm;
cudaStream_t stream;
// Algorithm details
int chunkSteps;
int sliceSteps;
// Computed later
ncclDevRedOpFull opFull;
int algorithm;
int protocol;
ncclPattern_t pattern;
int nChannels;
int nThreads;
size_t nBytes;
int nstepsPerLoop;
int nchunksPerLoop;
int chunkSize;
int channelId;
};
#endif