f9c3dc251e
Add local user buffer registration for NVLink SHARP. Add tuning plugin support. Increase net API to v7 to allow for device-side packet reordering; remove support for v4 plugins. Add support for RoCE ECE. Add support for C2C links. Better detect SHM allocation failures to avoid crash with Bus Error. Fix missing thread unlocks in bootstrap (Fixes #936). Disable network flush by default on H100. Move device code from src/collectives/device to src/device.
39 строки
1.0 KiB
C
39 строки
1.0 KiB
C
/*
|
|
* Copyright (c) 2016-2023, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* See COPYRIGHT for license information
|
|
*/
|
|
|
|
#ifndef NCCL_IPCSOCKET_H
|
|
#define NCCL_IPCSOCKET_H
|
|
|
|
#include "nccl.h"
|
|
#include <stdio.h>
|
|
#include <fcntl.h>
|
|
#include <sys/mman.h>
|
|
#include <unistd.h>
|
|
#include <errno.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/types.h>
|
|
#include <sys/socket.h>
|
|
#include <memory.h>
|
|
#include <sys/un.h>
|
|
#include <inttypes.h>
|
|
|
|
#define NCCL_IPC_SOCKNAME_LEN 64
|
|
|
|
struct ncclIpcSocket {
|
|
int fd;
|
|
char socketName[NCCL_IPC_SOCKNAME_LEN];
|
|
volatile uint32_t* abortFlag;
|
|
};
|
|
|
|
ncclResult_t ncclIpcSocketInit(struct ncclIpcSocket *handle, int rank, uint64_t hash, volatile uint32_t* abortFlag);
|
|
ncclResult_t ncclIpcSocketClose(struct ncclIpcSocket *handle);
|
|
ncclResult_t ncclIpcSocketGetFd(struct ncclIpcSocket* handle, int* fd);
|
|
|
|
ncclResult_t ncclIpcSocketRecvFd(struct ncclIpcSocket *handle, int *fd);
|
|
ncclResult_t ncclIpcSocketSendFd(struct ncclIpcSocket *handle, const int fd, int rank, uint64_t hash);
|
|
|
|
#endif /* NCCL_IPCSOCKET_H */
|