Files
rocm-systems/src/include/register.h
T
Sylvain Jeaugey ab2b89c4c3 2.21.5-1
Add support for IB SHARP 1PPN operation with user buffers.
Improve support for MNNVL, add NVLS support and multi-clique support.
 * Detect the NVLS clique through NVML
 * Exchange XML between peers in the same NVLS clique and fuse XMLs
   before creating the topology graph.
 * Rework bootstrap allgather algorithms to allow for large allgather
   operations intra-node (XML exchange).
Net/IB: add support for dynamic GID detection.
 * Automatically select RoCEv2/IPv4 interface by default. Allow to
   select IPv6 or even the network/mask.
Reduce NVLS memory usage.
 * Add stepSize as property of a connection to allow for different
   sizes on different peers; set it to 128K for NVLink SHARP.
Improve tuner loading
 * Look for more paths, be more consistent with the network device
   plugin.
 * Also search for tuner support inside the net plugin.
Improve tuner API
 * Add context to support multi-device per process.
Add magic number around comm object to detect comm corruption.
 * Add some basic check around communicators so that we can report a
   problem when a communicator gets corrupted or a wrong comm pointer
   is passed to NCCL.
Fix net/IB error path. Github PR #1164
Fix collnet rail mapping with split comm.
Fix packet reordering issue causing bootstrap mismatch
 * Use a different tag in ncclTransportP2pSetup for the connectInfo
   exchange and the following barrier.
Fix hang when crossNic is inconsistent between ranks.
Fix minCompCap/maxCompCap computation. Github issue #1184
2024-04-02 01:53:21 -07:00

47 строки
1.0 KiB
C

#ifndef NCCL_REGISTER_H_
#define NCCL_REGISTER_H_
enum {
NET_REG_COMPLETE = 0x01,
NVLS_REG_COMPLETE = 0x02,
NVLS_REG_POSSIBLE = 0x04,
NVLS_REG_NO_SUPPORT = 0x08,
COLLNET_REG_COMPLETE = 0x10
};
struct ncclReg {
// common attributes
size_t pages;
int refs;
uintptr_t addr;
uint32_t state;
// net reg
int nDevs;
int devs[MAXCHANNELS];
void** handles;
// nvls reg
uintptr_t baseAddr;
size_t baseSize;
CUdeviceptr regAddr;
size_t regSize;
int dev;
CUmemGenericAllocationHandle mcHandle;
uintptr_t caddrs[NCCL_MAX_LOCAL_RANKS]; /* use to check if NVLS buffers match among intra-node ranks */
// collnet reg
void* collnetHandle;
struct ncclProxyConnector* proxyconn;
};
struct ncclRegCache {
struct ncclReg **slots;
int capacity, population;
uintptr_t pageSize;
void* sComms[MAXCHANNELS];
void* rComms[MAXCHANNELS];
};
ncclResult_t ncclRegCleanup(struct ncclComm* comm);
ncclResult_t ncclRegFind(struct ncclComm* comm, const void* data, size_t size, struct ncclReg** reg);
#endif