Determine fine grained memory availability at RCCL bootstrapping (#471)

[ROCm/rccl commit: e9bf01fb7e]
This commit is contained in:
Wenkai Du
2021-11-19 08:12:53 -08:00
zatwierdzone przez GitHub
rodzic 5b442ffdc1
commit 864c6a645e
5 zmienionych plików z 14 dodań i 24 usunięć
-10
Wyświetl plik
@@ -89,16 +89,6 @@ static ncclResult_t ncclCudaMemcpy(T* dst, T* src, size_t nelem) {
return ncclSuccess;
}
static bool hasFineGrainVramPcie() {
int *ptr;
if (hipExtMallocWithFlags((void**)&ptr, sizeof(int), hipDeviceMallocFinegrained) == hipSuccess) {
CUDACHECK(hipFree(ptr));
return true;
}
else
return false;
}
// Allocate memory to be potentially ibv_reg_mr'd. This needs to be
// allocated on separate pages as those pages will be marked DONTFORK
// and if they are shared, that could cause a crash in a child process
-1
Wyświetl plik
@@ -43,7 +43,6 @@ static ncclResult_t ncclGpuGdrSupport(int* gdrSupport) {
NCCLCHECK(ncclNet->getProperties(dev, &props));
if ((props.ptrSupport & NCCL_PTR_CUDA) == 0) continue;
#if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__)
if (!hasFineGrainVramPcie()) continue;
*gdrSupport = 1;
break;
#endif
@@ -29,6 +29,7 @@ struct ncclPeerInfo {
int rank;
int cudaDev;
int gdrSupport;
bool hasFineGrain;
uint64_t hostHash;
uint64_t pidHash;
dev_t shmDev;
+12 -1
Wyświetl plik
@@ -613,7 +613,18 @@ static ncclResult_t fillInfo(struct ncclComm* comm, struct ncclPeerInfo* info, u
info->busId = comm->busId;
NCCLCHECK(ncclGpuGdrSupport(&info->gdrSupport));
// detect if fine grained memory is available on this GPU
int *ptr;
if (hipExtMallocWithFlags((void**)&ptr, sizeof(int), hipDeviceMallocFinegrained) == hipSuccess) {
CUDACHECK(hipFree(ptr));
info->hasFineGrain = true;
NCCLCHECK(ncclGpuGdrSupport(&info->gdrSupport));
}
else {
info->hasFineGrain = false;
info->gdrSupport = 0;
}
return ncclSuccess;
}
+1 -12
Wyświetl plik
@@ -56,7 +56,7 @@ int busIdToCudaDev(int64_t busId) {
/* Determine if two peers can communicate through p2p */
ncclResult_t p2pCanConnect(int* ret, struct ncclTopoSystem* topo, struct ncclTopoGraph* graph, struct ncclPeerInfo* info1, struct ncclPeerInfo* info2) {
#if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__)
if (!hasFineGrainVramPcie()) {
if (!info1->hasFineGrain || !info2->hasFineGrain) {
*ret = 0;
return ncclSuccess;
}
@@ -88,17 +88,6 @@ ncclResult_t p2pCanConnect(int* ret, struct ncclTopoSystem* topo, struct ncclTop
#endif
}
#if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__)
int dev;
CUDACHECK(hipGetDevice(&dev));
CUDACHECK(hipSetDevice(cudaDev2));
if (!hasFineGrainVramPcie()) {
*ret = 0;
CUDACHECK(hipSetDevice(dev));
return ncclSuccess;
}
CUDACHECK(hipSetDevice(dev));
#endif
// Check that CUDA can do P2P
int p2p;
if (hipDeviceCanAccessPeer(&p2p, cudaDev1, cudaDev2) != hipSuccess) {