f9c3dc251e
Add local user buffer registration for NVLink SHARP. Add tuning plugin support. Increase net API to v7 to allow for device-side packet reordering; remove support for v4 plugins. Add support for RoCE ECE. Add support for C2C links. Better detect SHM allocation failures to avoid crash with Bus Error. Fix missing thread unlocks in bootstrap (Fixes #936). Disable network flush by default on H100. Move device code from src/collectives/device to src/device.
30 regels
935 B
C
30 regels
935 B
C
/*************************************************************************
|
|
* Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* See LICENSE.txt for license information
|
|
************************************************************************/
|
|
|
|
#include <stdlib.h>
|
|
|
|
#ifndef NCCL_P2P_H_
|
|
#define NCCL_P2P_H_
|
|
|
|
#define NCCL_P2P_HANDLE_TYPE CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR
|
|
|
|
typedef struct {
|
|
uint64_t data; // Needs to hold a CUmemGenericAllocationHandle for UDS fd support
|
|
} ncclCuDesc;
|
|
|
|
typedef union {
|
|
// Legacy CUDA IPC
|
|
cudaIpcMemHandle_t devIpc;
|
|
// cuMem API support
|
|
ncclCuDesc cuDesc;
|
|
} ncclIpcDesc;
|
|
|
|
ncclResult_t ncclP2pAllocateShareableBuffer(size_t size, ncclIpcDesc *ipcDesc, void **ptr);
|
|
ncclResult_t ncclP2pFreeShareableBuffer(ncclIpcDesc *ipcDesc);
|
|
ncclResult_t ncclP2pImportShareableBuffer(struct ncclComm *comm, int tpPeer, size_t size, ncclIpcDesc *ipcDesc, void **devMemPtr);
|
|
|
|
#endif
|