Files
rocm-systems/src/misc/shmutils.cc
T
Sylvain Jeaugey d97a32fac8 2.18.1-1
Add support for IB SHARP to NVLS (NVLink SHARP algorithm).
Add NVLS+Tree algorithm.
Add support for memory management using cuMem* functions.
Use all NICs for Send/Receive operations on systems with more than
one NIC per GPU (#804).
Add ncclCommSplit primitive, with resource sharing option in config.
Fix alltoallv hang (#788)
Increase number of channels on H100 when we're not limited by NVLink.
Improve error reporting in case of IB failure, printing local and
remote ID (#779).
Add build option to allow compilation against RDMA includes instead
of dynamically loading IB verbs symbols (#802).
Fix context creation for progress thread (#803).
NET/IB: add option to use multiple QPs in round-robin mode.
Fix tree performance issue when NVB is disabled on HCM topologies.
2023-04-18 03:58:25 -07:00

164 line
5.4 KiB
C++

/*************************************************************************
* Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
#include "shm.h"
#include "checks.h"
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <utils.h>
struct shmHandleInternal {
int fd;
char* shmPath;
char* shmPtr;
void* devShmPtr;
size_t shmSize;
size_t realShmSize;
int* refcount;
};
static void shmHandleInit(int fd, char* shmPath, size_t shmSize, size_t realShmSize, char* hptr, void* dptr, bool create, struct shmHandleInternal* handle) {
handle->fd = fd;
handle->shmPtr = hptr;
handle->devShmPtr = dptr;
handle->shmSize = shmSize;
handle->realShmSize = realShmSize;
handle->refcount = (int*)(hptr + shmSize);
if (create) {
int slen = strlen(shmPath);
handle->shmPath = (char*)malloc(slen + 1);
memcpy(handle->shmPath, shmPath, slen + 1);
if (hptr) memset(hptr, 0, shmSize);
} else {
handle->shmPath = NULL;
}
return;
}
ncclResult_t ncclShmOpen(char* shmPath, size_t shmSize, void** shmPtr, void** devShmPtr, int refcount, ncclShmHandle_t* handle) {
int fd = -1;
char* hptr = NULL;
void* dptr = NULL;
ncclResult_t ret = ncclSuccess;
struct shmHandleInternal* tmphandle;
bool create = refcount > 0 ? true : false;
const size_t refSize = sizeof(int); /* extra sizeof(int) bytes for reference count */
const size_t realShmSize = shmSize + refSize;
*handle = *shmPtr = NULL; /* assume shmPtr and handle always set correctly by users. */
EQCHECKGOTO(tmphandle = (struct shmHandleInternal*)calloc(1, sizeof(struct shmHandleInternal)), NULL, ret, fail);
if (create) {
/* refcount > 0 means the caller tries to allocate a shared memory. This shared memory segment will have
* refcount references; when the peer attaches, it should pass -1 to reduce one reference count. When it
* goes down to 0, unlink should be called in order to delete shared memory file. */
if (shmPath[0] == '\0') {
sprintf(shmPath, "/dev/shm/nccl-XXXXXX");
fd = mkstemp(shmPath);
} else {
SYSCHECKGOTO(fd = open(shmPath, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR), ret, fail);
}
if (ftruncate(fd, realShmSize) != 0) {
WARN("Error: failed to extend %s to %ld bytes", shmPath, realShmSize);
ret = ncclSystemError;
goto fail;
}
INFO(NCCL_ALLOC, "Allocated %ld bytes of shared memory in %s", realShmSize, shmPath);
} else {
SYSCHECKGOTO(fd = open(shmPath, O_RDWR, S_IRUSR | S_IWUSR), ret, fail);
}
hptr = (char*)mmap(NULL, realShmSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (hptr == MAP_FAILED) {
WARN("Could not map %s size %zi, error: %s", shmPath, realShmSize, strerror(errno));
ret = ncclSystemError;
goto fail;
}
if (create) {
*(int*)(hptr + shmSize) = refcount;
} else {
int remref = ncclAtomicRefCountDecrement((int*)(hptr + shmSize));
if (remref == 0) {
/* the last peer has completed attachment, it should unlink the shm mem file. */
if (unlink(shmPath) != 0) {
WARN("unlink shared memory %s failed, error: %s", shmPath, strerror(errno));
}
}
}
if (devShmPtr) {
CUDACHECKGOTO(cudaHostRegister((void*)hptr, realShmSize, cudaHostRegisterMapped), ret, fail);
CUDACHECKGOTO(cudaHostGetDevicePointer(&dptr, (void*)hptr, 0), ret, fail);
}
shmHandleInit(fd, shmPath, shmSize, realShmSize, hptr, dptr, create, tmphandle);
exit:
*shmPtr = hptr;
if (devShmPtr) *devShmPtr = dptr;
*handle = (ncclShmHandle_t)tmphandle;
return ret;
fail:
WARN("Error while %s shared memory segment %s (size %ld)", create ? "creating" : "attaching to", shmPath, shmSize);
if (tmphandle) {
shmHandleInit(fd, shmPath, shmSize, realShmSize, hptr, dptr, create, tmphandle);
ncclShmClose((ncclShmHandle_t)tmphandle);
tmphandle = NULL;
}
hptr = NULL;
dptr = NULL;
goto exit;
}
ncclResult_t ncclShmClose(ncclShmHandle_t handle) {
ncclResult_t ret = ncclSuccess;
struct shmHandleInternal* tmphandle = (struct shmHandleInternal*)handle;
if (tmphandle) {
if (tmphandle->fd >= 0) {
close(tmphandle->fd);
if (tmphandle->shmPath != NULL && *tmphandle->refcount > 0) {
if (unlink(tmphandle->shmPath) != 0) {
WARN("unlink shared memory %s failed, error: %s", tmphandle->shmPath, strerror(errno));
ret = ncclSystemError;
}
}
free(tmphandle->shmPath);
}
if (tmphandle->shmPtr) {
if (tmphandle->devShmPtr) CUDACHECK(cudaHostUnregister(tmphandle->shmPtr));
if (munmap(tmphandle->shmPtr, tmphandle->realShmSize) != 0) {
WARN("munmap of shared memory %p size %ld failed, error: %s", tmphandle->shmPtr, tmphandle->realShmSize, strerror(errno));
ret = ncclSystemError;
}
}
free(tmphandle);
}
return ret;
}
ncclResult_t ncclShmUnlink(ncclShmHandle_t handle) {
ncclResult_t ret = ncclSuccess;
struct shmHandleInternal* tmphandle = (struct shmHandleInternal*)handle;
if (tmphandle) {
if (tmphandle->shmPath != NULL) {
if (unlink(tmphandle->shmPath) != 0) {
WARN("unlink shared memory %s failed, error: %s", tmphandle->shmPath, strerror(errno));
ret = ncclSystemError;
}
free(tmphandle->shmPath);
tmphandle->shmPath = NULL;
}
}
return ret;
}