From 5fdf2edd397ffa6d977d22fac566c398641f9b24 Mon Sep 17 00:00:00 2001 From: Wenkai Du Date: Fri, 24 May 2019 20:58:51 +0000 Subject: [PATCH] Increase number of rings with XGMI connection Improve throughput for about 20%. Also remove P2P over PCIe which was left enabled at initial release. Signed-off-by: Wenkai Du [ROCm/rccl commit: f45566a8bd16352e4189b2a874dac2f522ae761f] --- projects/rccl/src/transport/p2p.cu | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/projects/rccl/src/transport/p2p.cu b/projects/rccl/src/transport/p2p.cu index a616ad5f6d..065e2a7bc7 100644 --- a/projects/rccl/src/transport/p2p.cu +++ b/projects/rccl/src/transport/p2p.cu @@ -119,9 +119,9 @@ ncclResult_t p2pCanConnect(ncclTvalue_t* ret, ncclTinfo_t* myOpaqueInfo, ncclTin link_status_print_once_mask |= (1 << (myInfo->cudaDev*8 + peerInfo->cudaDev)); } if (link_type != HSA_AMD_LINK_INFO_TYPE_XGMI) { - // enable below lines on release only: disable PCIe P2P until HDP flush is implemented. - // p2p = 0; - // return ncclSuccess; + // disable PCIe P2P until HDP flush is implemented. + p2p = 0; + return ncclSuccess; } int nvlinkp2p = 0; if (link_type == HSA_AMD_LINK_INFO_TYPE_XGMI && hops == 1) @@ -290,7 +290,11 @@ int p2pComputeRingsNvLink(ncclTvalue_t* values, int nranks, int* rings, int nrin } // Duplicate the rings for direct NVLink +#if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) + compNrings = copyRings(nranks, rings, compNrings, compNrings*3); +#else compNrings = copyRings(nranks, rings, compNrings, compNrings*2); +#endif if (ncclCudaCompCap() == 6) *nthreads /= 2; return compNrings;