Tuning: use constant value for CorrectionFactor tables
This commit is contained in:
کامیت شده توسط
Alexander Sannikov
والد
dea50b5e11
کامیت
50568dc93d
+11
-6
@@ -143,17 +143,25 @@ static const float baseLat [NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS] = {
|
||||
#define NCCL_HW_PCI 1
|
||||
#define NCCL_HW_NET 2
|
||||
|
||||
|
||||
|
||||
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__)
|
||||
#define RCCL_FACTOR_TABLE_SIZE 27
|
||||
#define RCCL_FACTOR_TABLE_MAX_INDEX (RCCL_FACTOR_TABLE_SIZE - 1)
|
||||
#endif
|
||||
struct tuningModel {
|
||||
float hwLat [3][NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
|
||||
float bwRatio [2][NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
|
||||
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__)
|
||||
float treeCorrectionFactor[NCCL_NUM_PROTOCOLS][RCCL_FACTOR_TABLE_SIZE];
|
||||
float ringCorrectionFactor[NCCL_NUM_PROTOCOLS][RCCL_FACTOR_TABLE_SIZE];
|
||||
#else
|
||||
float treeCorrectionFactor[NCCL_NUM_PROTOCOLS][27];
|
||||
float ringCorrectionFactor[NCCL_NUM_PROTOCOLS][27];
|
||||
#endif
|
||||
uint64_t llProtoRanges[RCCL_TUNABLE_COLLS][NCCL_NUM_PROTOCOLS - 1][RCCL_PROTOCOL_ENTRY_SIZE];
|
||||
uint64_t channelThresholds[RCCL_TUNABLE_COLLS][RCCL_CHANNELS_TUNABLE_ENTRIES][3]; //for each collective, set for 5 channel-counts: 2,4,8,16,32,40,48,56,64, {min,max,nchannels}
|
||||
};
|
||||
|
||||
|
||||
static struct tuningModel tuning_model_0 {
|
||||
.hwLat = {
|
||||
/* NVLINK */
|
||||
@@ -887,10 +895,7 @@ ncclResult_t ncclTopoGetAlgoTime(struct ncclComm* comm, int coll, int algorithm,
|
||||
}
|
||||
int logSize = log2i(nBytes>>6);
|
||||
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__)
|
||||
|
||||
if (logSize < 0) logSize = 0;
|
||||
if (logSize > 26) logSize = 26;
|
||||
|
||||
logSize = std::max(0, std::min(RCCL_FACTOR_TABLE_MAX_INDEX, logSize));
|
||||
if (algorithm == NCCL_ALGO_TREE) {
|
||||
bw *= rcclTuningModel[comm->topo->tuning].treeCorrectionFactor[protocol][logSize];
|
||||
}
|
||||
|
||||
مرجع در شماره جدید
Block a user