Detect HIP_UNCACHED_MEMORY support from HIP version (#842)
This commit is contained in:
+4
-1
@@ -542,7 +542,10 @@ if(${HIP_EVENT_DISABLE_FENCE})
|
||||
target_compile_definitions(rccl PRIVATE HIP_EVENT_DISABLE_FENCE)
|
||||
endif()
|
||||
if(${HIP_UNCACHED_MEMORY})
|
||||
target_compile_definitions(rccl PRIVATE HIP_UNCACHED_MEMORY)
|
||||
if(${hipcc_version_string} VERSION_GREATER_EQUAL "5.7.31920")
|
||||
target_compile_definitions(rccl PRIVATE HIP_UNCACHED_MEMORY)
|
||||
message(STATUS "HIP_UNCACHED_MEMORY enabled")
|
||||
endif()
|
||||
endif()
|
||||
if (BUILD_BFD)
|
||||
if (HAVE_BFD)
|
||||
|
||||
+3
-9
@@ -167,9 +167,7 @@ ncclResult_t ncclCudaMallocDebug(const char *filefunc, int line, T** ptr, size_t
|
||||
CUDACHECK(cudaThreadExchangeStreamCaptureMode(&mode));
|
||||
if (isFineGrain) {
|
||||
#if defined(HIP_UNCACHED_MEMORY)
|
||||
hipDeviceProp_t prop;
|
||||
CUDACHECK(hipGetDeviceProperties(&prop, 0));
|
||||
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained), result, finish);
|
||||
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocUncached), result, finish);
|
||||
#else
|
||||
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocFinegrained), result, finish);
|
||||
#endif
|
||||
@@ -195,9 +193,7 @@ ncclResult_t ncclCudaCallocDebug(const char *filefunc, int line, T** ptr, size_t
|
||||
CUDACHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
||||
if (isFineGrain) {
|
||||
#if defined(HIP_UNCACHED_MEMORY)
|
||||
hipDeviceProp_t prop;
|
||||
CUDACHECK(hipGetDeviceProperties(&prop, 0));
|
||||
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained), result, finish);
|
||||
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocUncached), result, finish);
|
||||
#else
|
||||
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocFinegrained), result, finish);
|
||||
#endif
|
||||
@@ -229,9 +225,7 @@ ncclResult_t ncclCudaCallocAsyncDebug(const char *filefunc, int line, T** ptr, s
|
||||
CUDACHECK(cudaThreadExchangeStreamCaptureMode(&mode));
|
||||
if (isFineGrain) {
|
||||
#if defined(HIP_UNCACHED_MEMORY)
|
||||
hipDeviceProp_t prop;
|
||||
CUDACHECK(hipGetDeviceProperties(&prop, 0));
|
||||
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained), result, finish);
|
||||
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocUncached), result, finish);
|
||||
#else
|
||||
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocFinegrained), result, finish);
|
||||
#endif
|
||||
|
||||
+3
-3
@@ -124,9 +124,11 @@ static ncclResult_t ncclInit() {
|
||||
if (strstr(strValue, "iommu=pt") == NULL)
|
||||
WARN("Missing \"iommu=pt\" from kernel command line which can lead to system instablity or hang!");
|
||||
}
|
||||
#ifndef HIP_UNCACHED_MEMORY
|
||||
char *env = getenv("HSA_FORCE_FINE_GRAIN_PCIE");
|
||||
if (env == NULL || strcmp(env, "1") != 0)
|
||||
WARN("Missing \"HSA_FORCE_FINE_GRAIN_PCIE=1\" from environment which can lead to low RCCL performance, system instablity or hang!");
|
||||
#endif
|
||||
}
|
||||
#ifndef NVTX_NO_IMPL
|
||||
initNvtxRegisteredEnums();
|
||||
@@ -715,9 +717,7 @@ static ncclResult_t fillInfo(struct ncclComm* comm, struct ncclPeerInfo* info, u
|
||||
// detect if fine grained memory is available on this GPU
|
||||
int *ptr;
|
||||
#if defined(HIP_UNCACHED_MEMORY)
|
||||
hipDeviceProp_t prop;
|
||||
CUDACHECK(hipGetDeviceProperties(&prop, 0));
|
||||
if (hipExtMallocWithFlags((void**)&ptr, sizeof(int), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained) == hipSuccess) {
|
||||
if (hipExtMallocWithFlags((void**)&ptr, sizeof(int), hipDeviceMallocUncached) == hipSuccess) {
|
||||
#else
|
||||
if (hipExtMallocWithFlags((void**)&ptr, sizeof(int), hipDeviceMallocFinegrained) == hipSuccess) {
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user