Add support for CUDA graphs.
Fuse BCM Gen4 switches to avoid suboptimal performance on some platforms. Issue #439.
Fix bootstrap issue caused by connection reordering.
Fix CPU locking block.
Improve CollNet algorithm.
Improve performance on DGX A100 for communicators with only one GPU per node.
This commit is contained in:
Sylvain Jeaugey
2021-04-12 16:00:11 -07:00
vanhempi 911d61f214
commit a46ea10583
43 muutettua tiedostoa jossa 2687 lisäystä ja 1244 poistoa
+3 -3
Näytä tiedosto
@@ -1,5 +1,5 @@
/*************************************************************************
* Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2021, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
@@ -14,8 +14,8 @@ ncclResult_t bootstrapCreateRoot(ncclUniqueId* commId, bool idFromEnv);
ncclResult_t bootstrapGetUniqueId(ncclUniqueId* out);
ncclResult_t bootstrapInit(ncclUniqueId* id, int rank, int nranks, void** commState);
ncclResult_t bootstrapAllGather(void* commState, void* allData, int size);
ncclResult_t bootstrapSend(void* commState, int peer, void* data, int size);
ncclResult_t bootstrapRecv(void* commState, int peer, void* data, int size);
ncclResult_t bootstrapSend(void* commState, int peer, int tag, void* data, int size);
ncclResult_t bootstrapRecv(void* commState, int peer, int tag, void* data, int size);
ncclResult_t bootstrapRemAlloc(size_t size, int rank, void* commState, int* id, cudaIpcMemHandle_t* ipc, void** ptr);
ncclResult_t bootstrapRemFree(int id, int rank, void* commState);
ncclResult_t bootstrapClose(void* commState);