From 67c8e72ce331d2ad3b5c5885e6415018e9c5ea13 Mon Sep 17 00:00:00 2001 From: Wenkai Du Date: Fri, 22 May 2020 17:15:21 +0000 Subject: [PATCH] Use cached value for detecting GDR support only once --- src/include/net.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/include/net.h b/src/include/net.h index a6ac5ba327..29afc41b78 100644 --- a/src/include/net.h +++ b/src/include/net.h @@ -36,6 +36,14 @@ static ncclResult_t ncclNetCloseListen(void* listenComm) { NCCLCHECK(ncclNet->cl static ncclResult_t ncclGpuGdrSupport(int* gdrSupport) { int netDevs; NCCLCHECK(ncclNetDevices(&netDevs)); + pthread_mutex_t ncclParamMutexGpuGdrSupport = PTHREAD_MUTEX_INITIALIZER; + static int gdrSupportCached[16] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; + int cudaDev; + CUDACHECK(hipGetDevice(&cudaDev)); + if (gdrSupportCached[cudaDev] != -1) { + *gdrSupport = gdrSupportCached[cudaDev]; + return ncclSuccess; + } *gdrSupport = 0; for (int dev=0; dev