Tuning: use constant value for CorrectionFactor tables

This commit is contained in:
alexander-sannikov
2025-12-08 18:32:08 +00:00
کامیت شده توسط Alexander Sannikov
والد dea50b5e11
کامیت 50568dc93d
+11 -6
مشاهده پرونده
@@ -143,17 +143,25 @@ static const float baseLat [NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS] = {
#define NCCL_HW_PCI 1
#define NCCL_HW_NET 2
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__)
#define RCCL_FACTOR_TABLE_SIZE 27
#define RCCL_FACTOR_TABLE_MAX_INDEX (RCCL_FACTOR_TABLE_SIZE - 1)
#endif
struct tuningModel {
float hwLat [3][NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
float bwRatio [2][NCCL_NUM_ALGORITHMS][NCCL_NUM_PROTOCOLS];
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__)
float treeCorrectionFactor[NCCL_NUM_PROTOCOLS][RCCL_FACTOR_TABLE_SIZE];
float ringCorrectionFactor[NCCL_NUM_PROTOCOLS][RCCL_FACTOR_TABLE_SIZE];
#else
float treeCorrectionFactor[NCCL_NUM_PROTOCOLS][27];
float ringCorrectionFactor[NCCL_NUM_PROTOCOLS][27];
#endif
uint64_t llProtoRanges[RCCL_TUNABLE_COLLS][NCCL_NUM_PROTOCOLS - 1][RCCL_PROTOCOL_ENTRY_SIZE];
uint64_t channelThresholds[RCCL_TUNABLE_COLLS][RCCL_CHANNELS_TUNABLE_ENTRIES][3]; //for each collective, set for 5 channel-counts: 2,4,8,16,32,40,48,56,64, {min,max,nchannels}
};
static struct tuningModel tuning_model_0 {
.hwLat = {
/* NVLINK */
@@ -887,10 +895,7 @@ ncclResult_t ncclTopoGetAlgoTime(struct ncclComm* comm, int coll, int algorithm,
}
int logSize = log2i(nBytes>>6);
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__)
if (logSize < 0) logSize = 0;
if (logSize > 26) logSize = 26;
logSize = std::max(0, std::min(RCCL_FACTOR_TABLE_MAX_INDEX, logSize));
if (algorithm == NCCL_ALGO_TREE) {
bw *= rcclTuningModel[comm->topo->tuning].treeCorrectionFactor[protocol][logSize];
}