From 4da9c54d4e1d47c844e1353034189f84e8e73609 Mon Sep 17 00:00:00 2001 From: Wenkai Du <43822138+wenkaidu@users.noreply.github.com> Date: Tue, 30 Mar 2021 09:06:39 -0700 Subject: [PATCH] Check fine grained memory on peer GPU before enabling P2P (#337) [ROCm/rccl commit: 0c78553ee02d5ddb9fa3cee1bd509e26cba97785] --- projects/rccl/src/transport/p2p.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/projects/rccl/src/transport/p2p.cc b/projects/rccl/src/transport/p2p.cc index 4e41922768..1484380af5 100644 --- a/projects/rccl/src/transport/p2p.cc +++ b/projects/rccl/src/transport/p2p.cc @@ -92,6 +92,17 @@ ncclResult_t p2pCanConnect(int* ret, struct ncclTopoSystem* topo, struct ncclTop #endif } +#if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__) + int dev; + CUDACHECK(hipGetDevice(&dev)); + CUDACHECK(hipSetDevice(cudaDev2)); + if (!hasFineGrainVramPcie()) { + *ret = 0; + CUDACHECK(hipSetDevice(dev)); + return ncclSuccess; + } + CUDACHECK(hipSetDevice(dev)); +#endif // Check that CUDA can do P2P int p2p; if (hipDeviceCanAccessPeer(&p2p, cudaDev1, cudaDev2) != hipSuccess) {