msccl algorithms tuning for allreduce on MI300 (#1088)
[ROCm/rccl commit: 5a0f9990a9]
This commit is contained in:
committed by
GitHub
orang tua
3cd03179cb
melakukan
bf48d1bc4d
@@ -1,4 +1,4 @@
|
||||
<algo name="allreduce_pairs" proto="LL" nchannels="4" nchunksperloop="32" ngpus="8" coll="allreduce" inplace="1" outofplace="0" minBytes="0" maxBytes="20480">
|
||||
<algo name="allreduce_pairs" proto="LL" nchannels="4" nchunksperloop="32" ngpus="8" coll="allreduce" inplace="1" outofplace="0" minBytes="0" maxBytes="25599">
|
||||
<gpu id="0" i_chunks="32" o_chunks="0" s_chunks="224">
|
||||
<tb id="0" send="-1" recv="-1" chan="0">
|
||||
<step s="0" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="8" deps="0" hasdep="0"/>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<algo name="allreduce_pairs" proto="LL" nchannels="4" nchunksperloop="256" ngpus="8" coll="allreduce" inplace="1" outofplace="0" minBytes="20481" maxBytes="81919">
|
||||
<algo name="allreduce_pairs" proto="LL" nchannels="4" nchunksperloop="256" ngpus="8" coll="allreduce" inplace="1" outofplace="0" minBytes="25600" maxBytes="65536">
|
||||
<gpu id="0" i_chunks="256" o_chunks="0" s_chunks="224">
|
||||
<tb id="0" send="-1" recv="-1" chan="0">
|
||||
<step s="0" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="8" deps="1" hasdep="0"/>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<algo name="allreduce_pairs" proto="LL" nchannels="8" nchunksperloop="512" ngpus="8" coll="allreduce" inplace="1" outofplace="0" minBytes="81920" maxBytes="1048575">
|
||||
<algo name="allreduce_pairs" proto="LL" nchannels="8" nchunksperloop="512" ngpus="8" coll="allreduce" inplace="1" outofplace="0" minBytes="65537" maxBytes="524287">
|
||||
<gpu id="0" i_chunks="512" o_chunks="0" s_chunks="448">
|
||||
<tb id="0" send="-1" recv="-1" chan="0">
|
||||
<step s="0" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="16" deps="1" hasdep="0"/>
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
<algo name="allreduce_pairs" proto="Simple" nchannels="8" nchunksperloop="512" ngpus="8" coll="allreduce" inplace="1" outofplace="0" minBytes="1048576" maxBytes="11534336">
|
||||
<algo name="allreduce_pairs" proto="Simple" nchannels="8" nchunksperloop="512" ngpus="8" coll="allreduce" inplace="1" outofplace="0" minBytes="524288" maxBytes="11534336">
|
||||
<gpu id="0" i_chunks="512" o_chunks="0" s_chunks="448">
|
||||
<tb id="0" send="-1" recv="-1" chan="0">
|
||||
<step s="0" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="16" deps="1" hasdep="0"/>
|
||||
|
||||
File diff ditekan karena terlalu besar
Load Diff
Reference in New Issue
Block a user