diff --git a/src/collectives/device/common.h b/src/collectives/device/common.h index 4acd171f43..002f189acc 100644 --- a/src/collectives/device/common.h +++ b/src/collectives/device/common.h @@ -476,8 +476,8 @@ __forceinline__ __device__ void ncclKernel( int y = __popcll(channelMask & ((1ull<nodes[GPU].nodes[0].gpu.gcn, "gfx94") && - system->nodes[GPU].count == 8 && system->nodes[NET].count == 8)) { - remove = 0; - system->type |= RCCL_TOPO_FORCE_INTRA; - } comm->localRanks = system->nodes[GPU].count; if (system->nodes[GPU].count == comm->nRanks && remove) { for (int n=system->nodes[NET].count-1; n>=0; n--) diff --git a/src/graph/rome_models.cc b/src/graph/rome_models.cc index 6f47e8fac1..4fbd4e39e8 100644 --- a/src/graph/rome_models.cc +++ b/src/graph/rome_models.cc @@ -551,7 +551,7 @@ static struct rcclRomeModel rome_model_79 = { .connMatrix = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, }, .gdrLevel = { }, .pattern = "4040", - .ringBase = "0 1 2 3 4 5 6 7|0 1 2 3 4 5 7 6|0 2 4 1 3 6 5 7|0 2 4 6 1 7 3 5|0 3 1 5 2 7 4 6|0 3 5 1 6 2 7 4|0 4 1 7 3 6 2 5|7 6 5 4 3 2 1 0|6 7 5 4 3 2 1 0|7 5 6 3 1 4 2 0|5 3 7 1 6 4 2 0|6 4 7 2 5 1 3 0|4 7 2 6 1 5 3 0|5 2 6 3 7 1 4 0", + .ringBase = "0 1 2 3 4 5 6 7|0 1 2 3 4 5 7 6|0 2 4 1 3 6 5 7|0 2 4 6 1 7 3 5|0 3 1 5 2 7 4 6|0 3 5 1 6 2 7 4|0 4 1 7 3 6 2 5|7 6 5 4 3 2 1 0|6 7 5 4 3 2 1 0|7 5 6 3 1 4 2 0|5 3 7 1 6 4 2 0|6 4 7 2 5 1 3 0|4 7 2 6 1 5 3 0|5 2 6 3 7 1 4 0|0 1 2 3 4 5 6 7|0 1 2 3 4 5 7 6|0 2 4 1 3 6 5 7|0 2 4 6 1 7 3 5|0 3 1 5 2 7 4 6|0 3 5 1 6 2 7 4|0 4 1 7 3 6 2 5|7 6 5 4 3 2 1 0|6 7 5 4 3 2 1 0|7 5 6 3 1 4 2 0|5 3 7 1 6 4 2 0|6 4 7 2 5 1 3 0|4 7 2 6 1 5 3 0|5 2 6 3 7 1 4 0", .options = "noCpuCheck=1,mscclEnabled=1", }; diff --git a/src/include/devcomm.h b/src/include/devcomm.h index d9968b34bf..dabd33f638 100644 --- a/src/include/devcomm.h +++ b/src/include/devcomm.h @@ -55,7 +55,7 @@ union ncclLLFifoLine { }; #define WARP_SIZE warpSize -#define MAXCHANNELS 32 +#define MAXCHANNELS 64 #define NCCL_MAX_NTHREADS 256 #define NCCL_SIMPLE_MAX_NTHREADS NCCL_MAX_NTHREADS #define NCCL_LL_MAX_NTHREADS NCCL_MAX_NTHREADS