Merge pull request #122 from wenkaidu/tune_ll
Tune LL threshold for VEGA
This commit is contained in:
@@ -84,8 +84,8 @@ class ncclPrimitives {
|
||||
|
||||
__device__ int checkAbort(volatile uint64_t* remoteOpCount) {
|
||||
spins++;
|
||||
abort = LOAD(comm->abortFlag);
|
||||
if (spins == SPINS_BEFORE_CHECK_ABORT) {
|
||||
abort = LOAD(comm->abortFlag);
|
||||
checkMismatch(remoteOpCount);
|
||||
spins = 0;
|
||||
}
|
||||
@@ -404,8 +404,8 @@ class ncclLLPrimitives {
|
||||
|
||||
__device__ int checkAbort(volatile uint64_t* remoteOpCount) {
|
||||
spins++;
|
||||
abort = LOAD(comm->abortFlag);
|
||||
if (spins == SPINS_BEFORE_CHECK_ABORT) {
|
||||
abort = LOAD(comm->abortFlag);
|
||||
checkMismatch(remoteOpCount);
|
||||
spins = 0;
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#define NCCL_LL_CHANNEL_THRESHOLD 8 // Per thread size before we start increasing nrings
|
||||
#define NCCL_THREAD_THRESHOLD 256 // Per thread size before we switch to non-LL
|
||||
#define NCCL_THREAD_THRESHOLD_PREVOLTA 32 // Per thread size before we switch to non-LL for pre-Volta archs
|
||||
#define NCCL_THREAD_THRESHOLD_VEGA 8 // Per thread size before we switch to non-LL for VEGA
|
||||
#define NCCL_LL_MIN_NTHREADS 256
|
||||
|
||||
ncclResult_t ncclEnqueueCheck(struct ncclInfo* info);
|
||||
|
||||
@@ -150,7 +150,11 @@ NCCL_PARAM(TreeThreshold, "TREE_THRESHOLD", 0);
|
||||
int ncclThreadThreshold(int minCompCap, int multiNode) {
|
||||
int threshold = ncclParamThreadThreshold();
|
||||
if (threshold == -2) { // user has not set this env variable
|
||||
#if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__)
|
||||
threshold = NCCL_THREAD_THRESHOLD_VEGA;
|
||||
#else
|
||||
threshold = (minCompCap <= 6) ? NCCL_THREAD_THRESHOLD_PREVOLTA : NCCL_THREAD_THRESHOLD;
|
||||
#endif
|
||||
// multiply by 2 if running on multiple nodes
|
||||
if (multiNode) {
|
||||
threshold *= 2;
|
||||
|
||||
Reference in New Issue
Block a user