From bed6070e1285446f410ca54cf7f7ce820d7d200f Mon Sep 17 00:00:00 2001 From: Pedram Alizadeh Date: Wed, 17 Dec 2025 16:58:54 -0500 Subject: [PATCH] Adding tuning conf file for CU reduction for AR, AG, and RS with under-subscribed number of GPUs per node (#2102) [ROCm/rccl commit: f0e7e8745f7f783c45d0501e1258fe3914a3d519] --- .../example/subset_gpus_nccl_cu_tuning.conf | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 projects/rccl/ext-tuner/example/subset_gpus_nccl_cu_tuning.conf diff --git a/projects/rccl/ext-tuner/example/subset_gpus_nccl_cu_tuning.conf b/projects/rccl/ext-tuner/example/subset_gpus_nccl_cu_tuning.conf new file mode 100644 index 0000000000..a079af3979 --- /dev/null +++ b/projects/rccl/ext-tuner/example/subset_gpus_nccl_cu_tuning.conf @@ -0,0 +1,53 @@ +# NCCL Tuner Configuration File (CSV Format) +# Format: collective_type,min_bytes,max_bytes,algorithm,protocol,channels,nNodes,nRanks,numPipeOps,regBuff +# +# Collective types: broadcast, reduce, allgather, reducescatter, allreduce +# Algorithms: tree, ring, collnet_direct, collnet_chain, nvls, nvls_tree, pat +# Protocols: ll, ll128, simple +# Channels: number of channels to use, or -1 to keep default +# nNodes: number of nodes to match, or -1 for any number of nodes +# nRanks: number of ranks to match, or -1 for any number of ranks +# numPipeOps: number of pipeline operations to match, or -1 for any number (optional) +# regBuff: whether user buffer can be registered (0=no, 1=yes, -1=any) (optional) +# +# Note: numPipeOps and regBuff parameters are optional - configurations without them will match any value +# +#AR 4PPN +allreduce,33554432,4294967296,ring,simple,16,2,8,-1,-1 +allreduce,33554432,4294967296,ring,simple,16,4,16,-1,-1 +allreduce,67108864,4294967296,ring,simple,16,8,32,-1,-1 +#AR 2PPN +allreduce,2097152,4294967296,ring,simple,4,2,4,-1,-1 +allreduce,16777216,4294967296,ring,simple,4,4,8,-1,-1 +allreduce,33554432,4294967296,ring,simple,4,8,16,-1,-1 +#AR 1PPN +allreduce,134217728,4294967296,ring,simple,4,4,4,-1,-1 +allreduce,67108864,4294967296,ring,simple,4,8,8,-1,-1 + + +#AG 4PPN +allgather,8388608,4294967296,ring,simple,16,2,8,-1,-1 +allgather,16777216,4294967296,ring,simple,16,4,16,-1,-1 +allgather,16777216,4294967296,ring,simple,16,8,32,-1,-1 +#AG 2PPN +allgather,262144,4294967296,ring,simple,4,2,4,-1,-1 +allgather,16777216,4294967296,ring,simple,4,4,8,-1,-1 +allgather,33554432,4294967296,ring,simple,4,8,16,-1,-1 +#AG 1PPN +allgather,262144,2097152,ring,simple,4,2,2,-1,-1 +allgather,262144,8388608,ring,simple,4,4,4,-1,-1 +allgather,67108864,4294967296,ring,simple,4,8,8,-1,-1 + +#RS 4PPN +reducescatter,1048576,4294967296,ring,simple,16,2,8,-1,-1 +reducescatter,1048576,4294967296,ring,simple,16,4,16,-1,-1 +reducescatter,1048576,4294967296,ring,simple,16,8,32,-1,-1 +#RS 2PPN +reducescatter,262144,33554432,ring,simple,4,2,4,-1,-1 +reducescatter,262144,4294967296,ring,simple,4,4,8,-1,-1 +reducescatter,262144,4294967296,ring,simple,4,8,16,-1,-1 +#RS 1PPN +reducescatter,131072,262144,ring,simple,4,2,2,-1,-1 +reducescatter,1048576,2097152,ring,simple,4,2,2,-1,-1 +reducescatter,131072,4194304,ring,simple,4,4,4,-1,-1 +reducescatter,262144,8388608,ring,simple,4,8,8,-1,-1 \ No newline at end of file