Detect HIP_UNCACHED_MEMORY support from HIP version (#842)

This commit is contained in:
Wenkai Du
2023-08-04 10:17:04 -07:00
committed by GitHub
parent 8e58b65873
commit d65c0830c6
3 changed files with 10 additions and 13 deletions
+4 -1
View File
@@ -542,7 +542,10 @@ if(${HIP_EVENT_DISABLE_FENCE})
target_compile_definitions(rccl PRIVATE HIP_EVENT_DISABLE_FENCE)
endif()
if(${HIP_UNCACHED_MEMORY})
target_compile_definitions(rccl PRIVATE HIP_UNCACHED_MEMORY)
if(${hipcc_version_string} VERSION_GREATER_EQUAL "5.7.31920")
target_compile_definitions(rccl PRIVATE HIP_UNCACHED_MEMORY)
message(STATUS "HIP_UNCACHED_MEMORY enabled")
endif()
endif()
if (BUILD_BFD)
if (HAVE_BFD)
+3 -9
View File
@@ -167,9 +167,7 @@ ncclResult_t ncclCudaMallocDebug(const char *filefunc, int line, T** ptr, size_t
CUDACHECK(cudaThreadExchangeStreamCaptureMode(&mode));
if (isFineGrain) {
#if defined(HIP_UNCACHED_MEMORY)
hipDeviceProp_t prop;
CUDACHECK(hipGetDeviceProperties(&prop, 0));
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained), result, finish);
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocUncached), result, finish);
#else
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocFinegrained), result, finish);
#endif
@@ -195,9 +193,7 @@ ncclResult_t ncclCudaCallocDebug(const char *filefunc, int line, T** ptr, size_t
CUDACHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
if (isFineGrain) {
#if defined(HIP_UNCACHED_MEMORY)
hipDeviceProp_t prop;
CUDACHECK(hipGetDeviceProperties(&prop, 0));
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained), result, finish);
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocUncached), result, finish);
#else
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocFinegrained), result, finish);
#endif
@@ -229,9 +225,7 @@ ncclResult_t ncclCudaCallocAsyncDebug(const char *filefunc, int line, T** ptr, s
CUDACHECK(cudaThreadExchangeStreamCaptureMode(&mode));
if (isFineGrain) {
#if defined(HIP_UNCACHED_MEMORY)
hipDeviceProp_t prop;
CUDACHECK(hipGetDeviceProperties(&prop, 0));
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained), result, finish);
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocUncached), result, finish);
#else
CUDACHECKGOTO(hipExtMallocWithFlags((void**)ptr, nelem*sizeof(T), hipDeviceMallocFinegrained), result, finish);
#endif
+3 -3
View File
@@ -124,9 +124,11 @@ static ncclResult_t ncclInit() {
if (strstr(strValue, "iommu=pt") == NULL)
WARN("Missing \"iommu=pt\" from kernel command line which can lead to system instablity or hang!");
}
#ifndef HIP_UNCACHED_MEMORY
char *env = getenv("HSA_FORCE_FINE_GRAIN_PCIE");
if (env == NULL || strcmp(env, "1") != 0)
WARN("Missing \"HSA_FORCE_FINE_GRAIN_PCIE=1\" from environment which can lead to low RCCL performance, system instablity or hang!");
#endif
}
#ifndef NVTX_NO_IMPL
initNvtxRegisteredEnums();
@@ -715,9 +717,7 @@ static ncclResult_t fillInfo(struct ncclComm* comm, struct ncclPeerInfo* info, u
// detect if fine grained memory is available on this GPU
int *ptr;
#if defined(HIP_UNCACHED_MEMORY)
hipDeviceProp_t prop;
CUDACHECK(hipGetDeviceProperties(&prop, 0));
if (hipExtMallocWithFlags((void**)&ptr, sizeof(int), prop.gcnArch/10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained) == hipSuccess) {
if (hipExtMallocWithFlags((void**)&ptr, sizeof(int), hipDeviceMallocUncached) == hipSuccess) {
#else
if (hipExtMallocWithFlags((void**)&ptr, sizeof(int), hipDeviceMallocFinegrained) == hipSuccess) {
#endif