single-node AR msccl algorithm tuning for MI300 (#1629)

This commit is contained in:
Pedram Alizadeh
2025-04-10 10:42:28 -04:00
zatwierdzone przez GitHub
rodzic b6d97a6176
commit 5b36b68d06
3 zmienionych plików z 2 dodań i 11732 usunięć
@@ -1,4 +1,4 @@
<algo name="allreduce_pairs" proto="Simple" nchannels="8" nchunksperloop="512" ngpus="8" coll="allreduce" inplace="0" outofplace="1" minBytes="524288" maxBytes="16777216">
<algo name="allreduce_pairs" proto="Simple" nchannels="8" nchunksperloop="512" ngpus="8" coll="allreduce" inplace="0" outofplace="1" minBytes="524288" maxBytes="20971520">
<gpu id="0" i_chunks="512" o_chunks="512" s_chunks="448">
<tb id="0" send="-1" recv="-1" chan="0">
<step s="0" type="cpy" srcbuf="i" srcoff="0" dstbuf="o" dstoff="0" cnt="8" depid="-1" deps="-1" hasdep="1"/>
@@ -1,4 +1,4 @@
<algo name="allreduce_pairs" proto="Simple" nchannels="8" nchunksperloop="512" ngpus="8" coll="allreduce" inplace="1" outofplace="0" minBytes="524288" maxBytes="11534336">
<algo name="allreduce_pairs" proto="Simple" nchannels="8" nchunksperloop="512" ngpus="8" coll="allreduce" inplace="1" outofplace="0" minBytes="524288" maxBytes="20971520">
<gpu id="0" i_chunks="512" o_chunks="0" s_chunks="448">
<tb id="0" send="-1" recv="-1" chan="0">
<step s="0" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="16" deps="1" hasdep="0"/>