a8908b34ee
Optimization for Tree allreduce on A100.
Improve aggregation performance.
Use shared buffers for inter-node send/recv.
Add NVTX profiling hooks.
Accelerate alltoall connections by merging communication for all
channels.
Add support for one hop communication through NVLink, for faster
send/recv communication on cubemesh topologies like DGX-1.
Improve alltoall scheduling to better balance intra/inter node
communication.
Increase send/recv parallelism by 8x, each warp sending or
receiving to a different peer.
Net: move to v4.
Net: make flush operation asynchronous to accelerate alltoall.
Net: define maximum number of requests.
Fix hang when using LL128 protocol after 2^31 steps.
Fix #379 : topology injection failing when using less GPUs than
described in the XML.
Fix #394 : protocol mismatch causing hangs or crashes when using
one GPU per node.
[ROCm/rccl commit: 920dbe5b35]
24 строки
1.1 KiB
C
24 строки
1.1 KiB
C
/*************************************************************************
|
|
* Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* See LICENSE.txt for license information
|
|
************************************************************************/
|
|
|
|
#ifndef NCCL_BOOTSTRAP_H_
|
|
#define NCCL_BOOTSTRAP_H_
|
|
|
|
#include "nccl.h"
|
|
|
|
ncclResult_t bootstrapNetInit();
|
|
ncclResult_t bootstrapCreateRoot(ncclUniqueId* commId, bool idFromEnv);
|
|
ncclResult_t bootstrapGetUniqueId(ncclUniqueId* out);
|
|
ncclResult_t bootstrapInit(ncclUniqueId* id, int rank, int nranks, void** commState);
|
|
ncclResult_t bootstrapAllGather(void* commState, void* allData, int size);
|
|
ncclResult_t bootstrapSend(void* commState, int peer, void* data, int size);
|
|
ncclResult_t bootstrapRecv(void* commState, int peer, void* data, int size);
|
|
ncclResult_t bootstrapRemAlloc(size_t size, int rank, void* commState, int* id, cudaIpcMemHandle_t* ipc, void** ptr);
|
|
ncclResult_t bootstrapRemFree(int id, int rank, void* commState);
|
|
ncclResult_t bootstrapClose(void* commState);
|
|
ncclResult_t bootstrapAbort(void* commState);
|
|
#endif
|