Check for fine grain support using memory allocation
Šī revīzija ir iekļauta:
@@ -51,4 +51,13 @@ static ncclResult_t ncclCudaMemcpy(T* dst, T* src, size_t nelem) {
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
static bool hasFineGrainVramPcie() {
|
||||
int *ptr;
|
||||
if (hipExtMallocWithFlags((void**)&ptr, sizeof(int), hipDeviceMallocFinegrained) == hipSuccess) {
|
||||
CUDACHECK(hipFree(ptr));
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -163,25 +163,6 @@ int ncclThreadThreshold(int minCompCap, int multiNode) {
|
||||
return threshold;
|
||||
}
|
||||
|
||||
bool useFineGrainVramPcie = false;
|
||||
|
||||
void parseHsaForceFineGrainVramPcie() {
|
||||
char* str = getenv("HSA_FORCE_FINE_GRAIN_PCIE");
|
||||
if (str && strlen(str) > 0) {
|
||||
errno = 0;
|
||||
int64_t v = strtoll(str, NULL, 0);
|
||||
if (errno || (v != 0 && v != 1)) {
|
||||
INFO(NCCL_ALL,"Invalid value %s for %s, using default %u.", str, "HSA_FORCE_FINE_GRAIN_PCIE", useFineGrainVramPcie);
|
||||
} else {
|
||||
useFineGrainVramPcie = v;
|
||||
INFO(NCCL_ALL,"%s set by environment to %u.", "HSA_FORCE_FINE_GRAIN_PCIE", useFineGrainVramPcie);
|
||||
}
|
||||
}
|
||||
else {
|
||||
INFO(NCCL_ALL,"%s not set by environment.", "HSA_FORCE_FINE_GRAIN_PCIE");
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_t initLock = PTHREAD_MUTEX_INITIALIZER;
|
||||
static bool initialized = false;
|
||||
static ncclResult_t ncclInit() {
|
||||
@@ -193,8 +174,6 @@ static ncclResult_t ncclInit() {
|
||||
initNet();
|
||||
initialized = true;
|
||||
}
|
||||
// Check if HSA_FORCE_FINE_GRAIN_PCIE is set in env
|
||||
parseHsaForceFineGrainVramPcie();
|
||||
pthread_mutex_unlock(&initLock);
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
@@ -245,8 +245,6 @@ end:
|
||||
return dev;
|
||||
}
|
||||
|
||||
extern bool useFineGrainVramPcie;
|
||||
|
||||
NCCL_PARAM(NetGdrRead, "NET_GDR_READ", -2);
|
||||
NCCL_PARAM(NetGdrLevel, "NET_GDR_LEVEL", PATH_PHB);
|
||||
|
||||
@@ -257,7 +255,7 @@ static ncclResult_t netGetGdrSupport(int dev, int read, int* useGdr) {
|
||||
CUDACHECK(hipGetDevice(&cudaDev));
|
||||
NCCLCHECK(getNvmlDevice(cudaDev, &nvmlDev))
|
||||
|
||||
if (!useFineGrainVramPcie) {
|
||||
if (!hasFineGrainVramPcie()) {
|
||||
INFO(NCCL_INIT|NCCL_NET,"NET/%s : GPU Direct RDMA Disabled for GPU %d / Need Fine Grain VRAM over PCIe", ncclNetName(), cudaDev);
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
@@ -58,8 +58,6 @@ struct p2pRecvResources {
|
||||
NCCL_PARAM(P2pLevel, "P2P_LEVEL", -2);
|
||||
NCCL_PARAM(P2pDisable, "P2P_DISABLE", -2);
|
||||
|
||||
extern bool useFineGrainVramPcie;
|
||||
|
||||
/* Convert a PCI busId string into a local cudaDev device index (cf. CUDA_VISIBLE_DEVICES) */
|
||||
static int busIdToCudaDev(const char* busId) {
|
||||
int ndev;
|
||||
@@ -86,6 +84,10 @@ ncclResult_t p2pCanConnect(ncclTvalue_t* ret, struct ncclPeerInfo* myInfo, struc
|
||||
|
||||
*ret = 0;
|
||||
|
||||
#if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__)
|
||||
if (!hasFineGrainVramPcie()) return ncclSuccess;
|
||||
#endif
|
||||
|
||||
if (p2pLevel == 0) return ncclSuccess;
|
||||
|
||||
// Rule out different nodes
|
||||
@@ -112,9 +114,6 @@ ncclResult_t p2pCanConnect(ncclTvalue_t* ret, struct ncclPeerInfo* myInfo, struc
|
||||
|
||||
// Do not detect topology if we're on the same GPU. Note this is not really supported.
|
||||
if (myInfo->cudaDev == peerCudaDev) {
|
||||
#if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__)
|
||||
if (!useFineGrainVramPcie) return ncclSuccess;
|
||||
#endif
|
||||
*ret = 1 + PATH_SYS;
|
||||
return ncclSuccess;
|
||||
}
|
||||
@@ -145,9 +144,6 @@ ncclResult_t p2pCanConnect(ncclTvalue_t* ret, struct ncclPeerInfo* myInfo, struc
|
||||
if (link_type == HSA_AMD_LINK_INFO_TYPE_XGMI) {
|
||||
if (hops == 1)
|
||||
nvlinkp2p = CONNECT_NVLINK;
|
||||
} else {
|
||||
if (!useFineGrainVramPcie)
|
||||
return ncclSuccess;
|
||||
}
|
||||
#else
|
||||
// Check for NVLink/NVswitch
|
||||
|
||||
Atsaukties uz šo jaunā problēmā
Block a user