Merge pull request #122 from wenkaidu/tune_ll

Tune LL threshold for VEGA
This commit is contained in:
Wenkai Du
2019-08-15 10:33:17 -07:00
committed by GitHub
3 fájl változott, egészen pontosan 7 új sor hozzáadva és 2 régi sor törölve
@@ -84,8 +84,8 @@ class ncclPrimitives {
__device__ int checkAbort(volatile uint64_t* remoteOpCount) {
spins++;
abort = LOAD(comm->abortFlag);
if (spins == SPINS_BEFORE_CHECK_ABORT) {
abort = LOAD(comm->abortFlag);
checkMismatch(remoteOpCount);
spins = 0;
}
@@ -404,8 +404,8 @@ class ncclLLPrimitives {
__device__ int checkAbort(volatile uint64_t* remoteOpCount) {
spins++;
abort = LOAD(comm->abortFlag);
if (spins == SPINS_BEFORE_CHECK_ABORT) {
abort = LOAD(comm->abortFlag);
checkMismatch(remoteOpCount);
spins = 0;
}
+1
Fájl megtekintése
@@ -15,6 +15,7 @@
#define NCCL_LL_CHANNEL_THRESHOLD 8 // Per thread size before we start increasing nrings
#define NCCL_THREAD_THRESHOLD 256 // Per thread size before we switch to non-LL
#define NCCL_THREAD_THRESHOLD_PREVOLTA 32 // Per thread size before we switch to non-LL for pre-Volta archs
#define NCCL_THREAD_THRESHOLD_VEGA 8 // Per thread size before we switch to non-LL for VEGA
#define NCCL_LL_MIN_NTHREADS 256
ncclResult_t ncclEnqueueCheck(struct ncclInfo* info);
+4
Fájl megtekintése
@@ -150,7 +150,11 @@ NCCL_PARAM(TreeThreshold, "TREE_THRESHOLD", 0);
int ncclThreadThreshold(int minCompCap, int multiNode) {
int threshold = ncclParamThreadThreshold();
if (threshold == -2) { // user has not set this env variable
#if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__)
threshold = NCCL_THREAD_THRESHOLD_VEGA;
#else
threshold = (minCompCap <= 6) ? NCCL_THREAD_THRESHOLD_PREVOLTA : NCCL_THREAD_THRESHOLD;
#endif
// multiply by 2 if running on multiple nodes
if (multiNode) {
threshold *= 2;