ab2b89c4c3
Add support for IB SHARP 1PPN operation with user buffers. Improve support for MNNVL, add NVLS support and multi-clique support. * Detect the NVLS clique through NVML * Exchange XML between peers in the same NVLS clique and fuse XMLs before creating the topology graph. * Rework bootstrap allgather algorithms to allow for large allgather operations intra-node (XML exchange). Net/IB: add support for dynamic GID detection. * Automatically select RoCEv2/IPv4 interface by default. Allow to select IPv6 or even the network/mask. Reduce NVLS memory usage. * Add stepSize as property of a connection to allow for different sizes on different peers; set it to 128K for NVLink SHARP. Improve tuner loading * Look for more paths, be more consistent with the network device plugin. * Also search for tuner support inside the net plugin. Improve tuner API * Add context to support multi-device per process. Add magic number around comm object to detect comm corruption. * Add some basic check around communicators so that we can report a problem when a communicator gets corrupted or a wrong comm pointer is passed to NCCL. Fix net/IB error path. Github PR #1164 Fix collnet rail mapping with split comm. Fix packet reordering issue causing bootstrap mismatch * Use a different tag in ncclTransportP2pSetup for the connectInfo exchange and the following barrier. Fix hang when crossNic is inconsistent between ranks. Fix minCompCap/maxCompCap computation. Github issue #1184
47 строки
1.0 KiB
C
47 строки
1.0 KiB
C
#ifndef NCCL_REGISTER_H_
|
|
#define NCCL_REGISTER_H_
|
|
|
|
enum {
|
|
NET_REG_COMPLETE = 0x01,
|
|
NVLS_REG_COMPLETE = 0x02,
|
|
NVLS_REG_POSSIBLE = 0x04,
|
|
NVLS_REG_NO_SUPPORT = 0x08,
|
|
COLLNET_REG_COMPLETE = 0x10
|
|
};
|
|
|
|
struct ncclReg {
|
|
// common attributes
|
|
size_t pages;
|
|
int refs;
|
|
uintptr_t addr;
|
|
uint32_t state;
|
|
// net reg
|
|
int nDevs;
|
|
int devs[MAXCHANNELS];
|
|
void** handles;
|
|
// nvls reg
|
|
uintptr_t baseAddr;
|
|
size_t baseSize;
|
|
CUdeviceptr regAddr;
|
|
size_t regSize;
|
|
int dev;
|
|
CUmemGenericAllocationHandle mcHandle;
|
|
uintptr_t caddrs[NCCL_MAX_LOCAL_RANKS]; /* use to check if NVLS buffers match among intra-node ranks */
|
|
// collnet reg
|
|
void* collnetHandle;
|
|
struct ncclProxyConnector* proxyconn;
|
|
};
|
|
|
|
struct ncclRegCache {
|
|
struct ncclReg **slots;
|
|
int capacity, population;
|
|
uintptr_t pageSize;
|
|
void* sComms[MAXCHANNELS];
|
|
void* rComms[MAXCHANNELS];
|
|
};
|
|
|
|
ncclResult_t ncclRegCleanup(struct ncclComm* comm);
|
|
ncclResult_t ncclRegFind(struct ncclComm* comm, const void* data, size_t size, struct ncclReg** reg);
|
|
|
|
#endif
|