Revert disabling of context tracking for Radeon (#1927)
* Revert disabling of context tracking for Radeon Original commitdf3b7e47`Disable context tracking for the current version. (#1839)` * Add env variable for disabling of context tracking for Radeon `export NCCL_DISABLE_CONTEXT_TRACKING=1` to force disable of context tracking * Update docs/how-to/rccl-usage-tips.rst Fix grammar, thanks @amd-jnovotny Co-authored-by: Jeffrey Novotny <jnovotny@amd.com> * Rename NCCL_DISABLE_CONTEXT_TRACKING -> RCCL_DISABLE_CONTEXT_TRACKING * Revert changes in includes and rename util function --------- Co-authored-by: Jeffrey Novotny <jnovotny@amd.com> [ROCm/rccl commit:07925ec027]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
6423f5b024
Коммит
722b0cd579
@@ -253,4 +253,13 @@ set during the benchmark in the following manner:
|
||||
|
||||
The default allreduce PyTorch benchmark peak bus bandwidth performance is
|
||||
~170 GB/s on a single OAM with ROCm 6.2.4, while the optimized run for CPX on a
|
||||
single OAM peaks at ~315 GB/s.
|
||||
single OAM peaks at ~315 GB/s.
|
||||
|
||||
Context tracking on Radeon GPUs
|
||||
----------------------------------------
|
||||
Context tracking is disabled on the AMD Instinct™ series of GPUs for better performance but is enabled for Radeon GPUs.
|
||||
To disable context tracking for Radeon GPUs, set the following environment variable:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
export RCCL_DISABLE_CONTEXT_TRACKING=1
|
||||
@@ -68,6 +68,8 @@ inline ncclResult_t getRandomData(void* buffer, size_t bytes) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool rcclNeedEnableContextTrack(int cuDeviceId);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename Int>
|
||||
|
||||
@@ -531,7 +531,12 @@ static ncclResult_t commFree(ncclComm_t comm) {
|
||||
NCCLCHECK(ncclProfilerPluginFinalize(comm));
|
||||
NCCLCHECK(ncclNetFinalize(comm));
|
||||
// Disable until we validate NCCL_LAUNCH_IMPLICIT_ORDER support.
|
||||
//ncclCudaContextDrop(comm->context);
|
||||
// but enable for Radeon due to big impact on performance
|
||||
if (rcclNeedEnableContextTrack(comm->cudaDev)) {
|
||||
ncclCudaContextDrop(comm->context);
|
||||
INFO(NCCL_INIT, "cudaDev %d context tracking destroyed", comm->cudaDev);
|
||||
}
|
||||
|
||||
free(comm);
|
||||
|
||||
return ncclSuccess;
|
||||
@@ -627,7 +632,11 @@ static ncclResult_t commAlloc(struct ncclComm* comm, struct ncclComm* parent, in
|
||||
CUDACHECK(cudaGetDevice(&comm->cudaDev));
|
||||
|
||||
// Disable until we validate NCCL_LAUNCH_IMPLICIT_ORDER support.
|
||||
//NCCLCHECK(ncclCudaContextTrack(&comm->context));
|
||||
// but enable for Radeon due to big impact on performance
|
||||
if (rcclNeedEnableContextTrack(comm->cudaDev)) {
|
||||
NCCLCHECK(ncclCudaContextTrack(&comm->context));
|
||||
INFO(NCCL_INIT, "cudaDev %d context tracking created", comm->cudaDev);
|
||||
}
|
||||
|
||||
NCCLCHECK(getBusId(comm->cudaDev, &comm->busId));
|
||||
char busId[]="0000:00:00.0";
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
#include "core.h"
|
||||
|
||||
#include "nvmlwrap.h"
|
||||
#include "archinfo.h"
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
@@ -191,6 +193,18 @@ bool matchIfList(const char* string, int port, struct netIf* ifList, int listSiz
|
||||
return false;
|
||||
}
|
||||
|
||||
RCCL_PARAM(DisableContextTracking, "DISABLE_CONTEXT_TRACKING", 0);
|
||||
bool rcclNeedEnableContextTrack(int cuDeviceId) {
|
||||
hipDeviceProp_t devProp;
|
||||
if (rcclParamDisableContextTracking() == 1)
|
||||
return false;
|
||||
if (hipGetDeviceProperties(&devProp, cuDeviceId) != 0)
|
||||
return false;
|
||||
return IsArchMatch(devProp.gcnArchName,"gfx11")
|
||||
|| IsArchMatch(devProp.gcnArchName,"gfx12")
|
||||
|| IsArchMatch(devProp.gcnArchName,"gfx10");
|
||||
}
|
||||
|
||||
__thread struct ncclThreadSignal ncclThreadSignalLocalInstance = ncclThreadSignalStaticInitializer();
|
||||
|
||||
void* ncclMemoryStack::allocateSpilled(struct ncclMemoryStack* me, size_t size, size_t align) {
|
||||
|
||||
Ссылка в новой задаче
Block a user