2019-03-14 19:39:20 -07:00
|
|
|
/*************************************************************************
|
2021-04-12 16:00:11 -07:00
|
|
|
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
2019-03-14 19:39:20 -07:00
|
|
|
*
|
|
|
|
|
* See LICENSE.txt for license information
|
|
|
|
|
************************************************************************/
|
|
|
|
|
|
|
|
|
|
#ifndef NCCL_ALLOC_H_
|
|
|
|
|
#define NCCL_ALLOC_H_
|
|
|
|
|
|
|
|
|
|
#include "nccl.h"
|
|
|
|
|
#include "checks.h"
|
2020-01-16 16:02:42 -08:00
|
|
|
#include "align.h"
|
2019-03-14 19:39:20 -07:00
|
|
|
#include <sys/mman.h>
|
|
|
|
|
|
2020-05-12 14:40:18 -07:00
|
|
|
template <typename T>
|
2021-07-08 14:12:04 -07:00
|
|
|
static ncclResult_t ncclCudaHostCallocDebug(T** ptr, size_t nelem, const char *filefunc, int line) {
|
2020-05-12 14:40:18 -07:00
|
|
|
CUDACHECK(cudaHostAlloc(ptr, nelem*sizeof(T), cudaHostAllocMapped));
|
|
|
|
|
memset(*ptr, 0, nelem*sizeof(T));
|
2021-07-08 14:12:04 -07:00
|
|
|
INFO(NCCL_ALLOC, "%s:%d Cuda Host Alloc Size %ld pointer %p", filefunc, line, nelem*sizeof(T), *ptr);
|
2019-03-14 19:39:20 -07:00
|
|
|
return ncclSuccess;
|
|
|
|
|
}
|
2021-07-08 14:12:04 -07:00
|
|
|
#define ncclCudaHostCalloc(...) ncclCudaHostCallocDebug(__VA_ARGS__, __FILE__, __LINE__)
|
2019-03-14 19:39:20 -07:00
|
|
|
|
|
|
|
|
static inline ncclResult_t ncclCudaHostFree(void* ptr) {
|
|
|
|
|
CUDACHECK(cudaFreeHost(ptr));
|
|
|
|
|
return ncclSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
2021-07-08 14:12:04 -07:00
|
|
|
static ncclResult_t ncclCallocDebug(T** ptr, size_t nelem, const char *filefunc, int line) {
|
2019-03-14 19:39:20 -07:00
|
|
|
void* p = malloc(nelem*sizeof(T));
|
|
|
|
|
if (p == NULL) {
|
|
|
|
|
WARN("Failed to malloc %ld bytes", nelem*sizeof(T));
|
|
|
|
|
return ncclSystemError;
|
|
|
|
|
}
|
|
|
|
|
memset(p, 0, nelem*sizeof(T));
|
|
|
|
|
*ptr = (T*)p;
|
2021-07-08 14:12:04 -07:00
|
|
|
INFO(NCCL_ALLOC, "%s:%d Mem Alloc Size %ld pointer %p", filefunc, line, nelem*sizeof(T), *ptr);
|
2019-03-14 19:39:20 -07:00
|
|
|
return ncclSuccess;
|
|
|
|
|
}
|
2021-07-08 14:12:04 -07:00
|
|
|
#define ncclCalloc(...) ncclCallocDebug(__VA_ARGS__, __FILE__, __LINE__)
|
2019-03-14 19:39:20 -07:00
|
|
|
|
|
|
|
|
template <typename T>
|
2021-07-08 14:12:04 -07:00
|
|
|
static ncclResult_t ncclCudaCallocDebug(T** ptr, size_t nelem, const char *filefunc, int line) {
|
2021-04-12 16:00:11 -07:00
|
|
|
// Need async stream for P2P pre-connect + CUDA Graph
|
|
|
|
|
cudaStream_t stream;
|
|
|
|
|
CUDACHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
2019-03-14 19:39:20 -07:00
|
|
|
CUDACHECK(cudaMalloc(ptr, nelem*sizeof(T)));
|
2021-04-12 16:00:11 -07:00
|
|
|
CUDACHECK(cudaMemsetAsync(*ptr, 0, nelem*sizeof(T), stream));
|
|
|
|
|
CUDACHECK(cudaStreamSynchronize(stream));
|
|
|
|
|
CUDACHECK(cudaStreamDestroy(stream));
|
2021-07-08 14:12:04 -07:00
|
|
|
INFO(NCCL_ALLOC, "%s:%d Cuda Alloc Size %ld pointer %p", filefunc, line, nelem*sizeof(T), *ptr);
|
2019-03-14 19:39:20 -07:00
|
|
|
return ncclSuccess;
|
|
|
|
|
}
|
2021-07-08 14:12:04 -07:00
|
|
|
#define ncclCudaCalloc(...) ncclCudaCallocDebug(__VA_ARGS__, __FILE__, __LINE__)
|
2019-03-14 19:39:20 -07:00
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
static ncclResult_t ncclCudaMemcpy(T* dst, T* src, size_t nelem) {
|
|
|
|
|
CUDACHECK(cudaMemcpy(dst, src, nelem*sizeof(T), cudaMemcpyDefault));
|
|
|
|
|
return ncclSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-16 16:02:42 -08:00
|
|
|
// Allocate memory to be potentially ibv_reg_mr'd. This needs to be
|
|
|
|
|
// allocated on separate pages as those pages will be marked DONTFORK
|
|
|
|
|
// and if they are shared, that could cause a crash in a child process
|
2021-07-08 14:12:04 -07:00
|
|
|
static ncclResult_t ncclIbMallocDebug(void** ptr, size_t size, const char *filefunc, int line) {
|
2020-01-16 16:02:42 -08:00
|
|
|
size_t page_size = sysconf(_SC_PAGESIZE);
|
|
|
|
|
void* p;
|
|
|
|
|
int size_aligned = ROUNDUP(size, page_size);
|
|
|
|
|
int ret = posix_memalign(&p, page_size, size_aligned);
|
|
|
|
|
if (ret != 0) return ncclSystemError;
|
|
|
|
|
memset(p, 0, size);
|
|
|
|
|
*ptr = p;
|
2021-07-08 14:12:04 -07:00
|
|
|
INFO(NCCL_ALLOC, "%s:%d Ib Alloc Size %ld pointer %p", filefunc, line, size, *ptr);
|
2020-01-16 16:02:42 -08:00
|
|
|
return ncclSuccess;
|
|
|
|
|
}
|
2021-07-08 14:12:04 -07:00
|
|
|
#define ncclIbMalloc(...) ncclIbMallocDebug(__VA_ARGS__, __FILE__, __LINE__)
|
2020-01-16 16:02:42 -08:00
|
|
|
|
2019-03-14 19:39:20 -07:00
|
|
|
#endif
|