Check for fine grain support using memory allocation

Šī revīzija ir iekļauta:
Wenkai Du
2019-11-01 15:58:49 -07:00
vecāks 90b2921207
revīzija 669f1951a4
4 mainīti faili ar 14 papildinājumiem un 32 dzēšanām
+9
Parādīt failu
@@ -51,4 +51,13 @@ static ncclResult_t ncclCudaMemcpy(T* dst, T* src, size_t nelem) {
return ncclSuccess;
}
static bool hasFineGrainVramPcie() {
int *ptr;
if (hipExtMallocWithFlags((void**)&ptr, sizeof(int), hipDeviceMallocFinegrained) == hipSuccess) {
CUDACHECK(hipFree(ptr));
return true;
}
else
return false;
}
#endif
-21
Parādīt failu
@@ -163,25 +163,6 @@ int ncclThreadThreshold(int minCompCap, int multiNode) {
return threshold;
}
bool useFineGrainVramPcie = false;
void parseHsaForceFineGrainVramPcie() {
char* str = getenv("HSA_FORCE_FINE_GRAIN_PCIE");
if (str && strlen(str) > 0) {
errno = 0;
int64_t v = strtoll(str, NULL, 0);
if (errno || (v != 0 && v != 1)) {
INFO(NCCL_ALL,"Invalid value %s for %s, using default %u.", str, "HSA_FORCE_FINE_GRAIN_PCIE", useFineGrainVramPcie);
} else {
useFineGrainVramPcie = v;
INFO(NCCL_ALL,"%s set by environment to %u.", "HSA_FORCE_FINE_GRAIN_PCIE", useFineGrainVramPcie);
}
}
else {
INFO(NCCL_ALL,"%s not set by environment.", "HSA_FORCE_FINE_GRAIN_PCIE");
}
}
pthread_mutex_t initLock = PTHREAD_MUTEX_INITIALIZER;
static bool initialized = false;
static ncclResult_t ncclInit() {
@@ -193,8 +174,6 @@ static ncclResult_t ncclInit() {
initNet();
initialized = true;
}
// Check if HSA_FORCE_FINE_GRAIN_PCIE is set in env
parseHsaForceFineGrainVramPcie();
pthread_mutex_unlock(&initLock);
return ncclSuccess;
}
+1 -3
Parādīt failu
@@ -245,8 +245,6 @@ end:
return dev;
}
extern bool useFineGrainVramPcie;
NCCL_PARAM(NetGdrRead, "NET_GDR_READ", -2);
NCCL_PARAM(NetGdrLevel, "NET_GDR_LEVEL", PATH_PHB);
@@ -257,7 +255,7 @@ static ncclResult_t netGetGdrSupport(int dev, int read, int* useGdr) {
CUDACHECK(hipGetDevice(&cudaDev));
NCCLCHECK(getNvmlDevice(cudaDev, &nvmlDev))
if (!useFineGrainVramPcie) {
if (!hasFineGrainVramPcie()) {
INFO(NCCL_INIT|NCCL_NET,"NET/%s : GPU Direct RDMA Disabled for GPU %d / Need Fine Grain VRAM over PCIe", ncclNetName(), cudaDev);
return ncclSuccess;
}
+4 -8
Parādīt failu
@@ -58,8 +58,6 @@ struct p2pRecvResources {
NCCL_PARAM(P2pLevel, "P2P_LEVEL", -2);
NCCL_PARAM(P2pDisable, "P2P_DISABLE", -2);
extern bool useFineGrainVramPcie;
/* Convert a PCI busId string into a local cudaDev device index (cf. CUDA_VISIBLE_DEVICES) */
static int busIdToCudaDev(const char* busId) {
int ndev;
@@ -86,6 +84,10 @@ ncclResult_t p2pCanConnect(ncclTvalue_t* ret, struct ncclPeerInfo* myInfo, struc
*ret = 0;
#if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__)
if (!hasFineGrainVramPcie()) return ncclSuccess;
#endif
if (p2pLevel == 0) return ncclSuccess;
// Rule out different nodes
@@ -112,9 +114,6 @@ ncclResult_t p2pCanConnect(ncclTvalue_t* ret, struct ncclPeerInfo* myInfo, struc
// Do not detect topology if we're on the same GPU. Note this is not really supported.
if (myInfo->cudaDev == peerCudaDev) {
#if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__)
if (!useFineGrainVramPcie) return ncclSuccess;
#endif
*ret = 1 + PATH_SYS;
return ncclSuccess;
}
@@ -145,9 +144,6 @@ ncclResult_t p2pCanConnect(ncclTvalue_t* ret, struct ncclPeerInfo* myInfo, struc
if (link_type == HSA_AMD_LINK_INFO_TYPE_XGMI) {
if (hops == 1)
nvlinkp2p = CONNECT_NVLINK;
} else {
if (!useFineGrainVramPcie)
return ncclSuccess;
}
#else
// Check for NVLink/NVswitch