diff --git a/CMakeLists.txt b/CMakeLists.txt index 09f0049b4a..070b3cb8bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -142,9 +142,6 @@ else() message(WARNING "Failed to extract ROCm version.") endif() -### Check for hipEventDisableSystemFence support -check_symbol_exists("hipEventDisableSystemFence" "hip/hip_runtime_api.h" HIP_EVENT_DISABLE_FENCE) - ### Check for hipDeviceMallocUncached support check_symbol_exists("hipDeviceMallocUncached" "hip/hip_runtime_api.h" HIP_UNCACHED_MEMORY) @@ -576,9 +573,6 @@ endif() if(TRACE) target_compile_definitions(rccl PRIVATE ENABLE_TRACE) endif() -if(${HIP_EVENT_DISABLE_FENCE}) - target_compile_definitions(rccl PRIVATE HIP_EVENT_DISABLE_FENCE) -endif() if(${HIP_CONTIGUOUS_MEMORY}) target_compile_definitions(rccl PRIVATE HIP_CONTIGUOUS_MEMORY) endif() diff --git a/src/init.cc b/src/init.cc index 41aae75558..5524490b0f 100644 --- a/src/init.cc +++ b/src/init.cc @@ -521,12 +521,7 @@ static ncclResult_t commAlloc(struct ncclComm* comm, struct ncclComm* parent, in // Try to create a CUDA object right away. If there is something wrong with // the device we're on (failure cause #1) , better know it early. hipEvent_t doneEvent; -#ifdef HIP_EVENT_DISABLE_FENCE - CUDACHECK(hipEventCreateWithFlags(&doneEvent, hipEventDisableTiming|hipEventDisableSystemFence)); -#else CUDACHECK(hipEventCreateWithFlags(&doneEvent, hipEventDisableTiming)); -#endif - comm->doneEvent = doneEvent; comm->lastStream = nullptr; diff --git a/src/misc/msccl/msccl_setup.cc b/src/misc/msccl/msccl_setup.cc index 88b379f988..e755f0f6a0 100644 --- a/src/misc/msccl/msccl_setup.cc +++ b/src/misc/msccl/msccl_setup.cc @@ -16,10 +16,6 @@ #include "msccl/msccl_setup.h" #include "msccl/msccl_status.h" -#ifndef HIP_EVENT_DISABLE_FENCE -RCCL_PARAM(MscclEnableDoneEvent, "MSCCL_ENABLE_DONE_EVENT", 1); -#endif - RCCL_PARAM(MscclWorkFifoDepth, "MSCCL_WORK_FIFO_DEPTH", 256<<10); static inline size_t computeSizeNeeded(size_t nBytes, int nScratchChunks, int nChunksPerLoop) { @@ -417,14 +413,7 @@ ncclResult_t mscclSetupKernel(const void* sendBuff, void* recvBuff, size_t count mscclStatus& status = mscclGetStatus(); mscclThreadLocalStatus& threadLocalStatus = mscclGetThreadLocalStatus(); - bool enableDoneEvent = -#ifndef HIP_EVENT_DISABLE_FENCE - (rcclParamMscclEnableDoneEvent() == 1); -#else - true; -#endif - - if (enableDoneEvent && (status.lastStream != stream && status.lastStream != nullptr)) { + if (status.lastStream != stream && status.lastStream != nullptr) { CUDACHECK(hipStreamWaitEvent(stream, comm->doneEvent, 0)); } @@ -526,11 +515,7 @@ ncclResult_t mscclSetupKernel(const void* sendBuff, void* recvBuff, size_t count void *args[3] = {&comm->devComm, &devAlgo, &workPtr}; void *func = mscclKernelEntries[fnIndex]; - if (enableDoneEvent) { - CUDACHECK(hipExtLaunchKernel(func, grid, block, args, 0, stream, NULL, comm->doneEvent, 0)); - } else { - CUDACHECK(hipExtLaunchKernel(func, grid, block, args, 0, stream, NULL, NULL, 0)); - } + CUDACHECK(hipExtLaunchKernel(func, grid, block, args, 0, stream, NULL, comm->doneEvent, 0)); status.workIndex++; status.lastStream = stream; return ncclSuccess;