From d65c0830c65e758a5dd57469192965104f1f1d82 Mon Sep 17 00:00:00 2001 From: Wenkai Du <43822138+wenkaidu@users.noreply.github.com> Date: Fri, 4 Aug 2023 10:17:04 -0700 Subject: [PATCH] Detect HIP_UNCACHED_MEMORY support from HIP version (#842) --- CMakeLists.txt | 5 ++++- src/include/alloc.h | 12 +++--------- src/init.cc | 6 +++--- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dcce63ccde..4142cd09cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -542,7 +542,10 @@ if(${HIP_EVENT_DISABLE_FENCE}) target_compile_definitions(rccl PRIVATE HIP_EVENT_DISABLE_FENCE) endif() if(${HIP_UNCACHED_MEMORY}) - target_compile_definitions(rccl PRIVATE HIP_UNCACHED_MEMORY) + if(${hipcc_version_string} VERSION_GREATER_EQUAL "5.7.31920") + target_compile_definitions(rccl PRIVATE HIP_UNCACHED_MEMORY) + message(STATUS "HIP_UNCACHED_MEMORY enabled") + endif() endif() if (BUILD_BFD) if (HAVE_BFD) diff --git a/src/include/alloc.h b/src/include/alloc.h index 9579044bf3..27a82fea82 100644 --- a/src/include/alloc.h +++ b/src/include/alloc.h @@ -167,9 +167,7 @@ ncclResult_t ncclCudaMallocDebug(const char *filefunc, int line, T** ptr, size_t CUDACHECK(cudaThreadExchangeStreamCaptureMode(&mode)); if (isFineGrain) { #if defined(HIP_UNCACHED_MEMORY) - hipDeviceProp_t prop; - CUDACHECK(hipGetDeviceProperties(&prop, 0)); - CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained), result, finish); + CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocUncached), result, finish); #else CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocFinegrained), result, finish); #endif @@ -195,9 +193,7 @@ ncclResult_t ncclCudaCallocDebug(const char *filefunc, int line, T** ptr, size_t CUDACHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); if (isFineGrain) { #if defined(HIP_UNCACHED_MEMORY) - hipDeviceProp_t prop; - CUDACHECK(hipGetDeviceProperties(&prop, 0)); - CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained), result, finish); + CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocUncached), result, finish); #else CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocFinegrained), result, finish); #endif @@ -229,9 +225,7 @@ ncclResult_t ncclCudaCallocAsyncDebug(const char *filefunc, int line, T** ptr, s CUDACHECK(cudaThreadExchangeStreamCaptureMode(&mode)); if (isFineGrain) { #if defined(HIP_UNCACHED_MEMORY) - hipDeviceProp_t prop; - CUDACHECK(hipGetDeviceProperties(&prop, 0)); - CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained), result, finish); + CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocUncached), result, finish); #else CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocFinegrained), result, finish); #endif diff --git a/src/init.cc b/src/init.cc index ac3a38f43c..c9bef33db2 100644 --- a/src/init.cc +++ b/src/init.cc @@ -124,9 +124,11 @@ static ncclResult_t ncclInit() { if (strstr(strValue, "iommu=pt") == NULL) WARN("Missing \"iommu=pt\" from kernel command line which can lead to system instablity or hang!"); } +#ifndef HIP_UNCACHED_MEMORY char *env = getenv("HSA_FORCE_FINE_GRAIN_PCIE"); if (env == NULL || strcmp(env, "1") != 0) WARN("Missing \"HSA_FORCE_FINE_GRAIN_PCIE=1\" from environment which can lead to low RCCL performance, system instablity or hang!"); +#endif } #ifndef NVTX_NO_IMPL initNvtxRegisteredEnums(); @@ -715,9 +717,7 @@ static ncclResult_t fillInfo(struct ncclComm* comm, struct ncclPeerInfo* info, u // detect if fine grained memory is available on this GPU int *ptr; #if defined(HIP_UNCACHED_MEMORY) - hipDeviceProp_t prop; - CUDACHECK(hipGetDeviceProperties(&prop, 0)); - if (hipExtMallocWithFlags((void**)&ptr, sizeof(int), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained) == hipSuccess) { + if (hipExtMallocWithFlags((void**)&ptr, sizeof(int), hipDeviceMallocUncached) == hipSuccess) { #else if (hipExtMallocWithFlags((void**)&ptr, sizeof(int), hipDeviceMallocFinegrained) == hipSuccess) { #endif