Optimize NCHANNELS and MSCCL config for gfx942 80CUs (#1195)
* Optimize NCHANNELS and MSCCL config for gfx942 80CUs
Set appropriately for different NCCL_MIN_NCHANNELS and MSCCL config,
potentially improving communication perf on the MI300x 80CUs
* Delete tools/msccl-algorithms/allreduce_1step_mccl_8_2_16777216_LL.xml
* Change the factor of gfx94 and update msccl config
[ROCm/rccl commit: cab25f919e]
This commit is contained in:
@@ -1390,6 +1390,18 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, struct ncclComm* p
|
||||
allGather3Data[rank].nc = std::max(allGather3Data[rank].nc, 4/ringGraph.nChannels);
|
||||
if (ringGraph.nChannels > MAXCHANNELS/2)
|
||||
allGather3Data[rank].nc = 1;
|
||||
if (IsArchMatch(comm->topo->nodes[GPU].nodes[idx].gpu.gcn, "gfx94")) {
|
||||
if (nranks == 2)
|
||||
// NCCL_MIN_NCHANNELS=32
|
||||
allGather3Data[rank].nc = 16;
|
||||
else if (nranks == 4)
|
||||
// NCCL_MIN_NCHANNELS=24
|
||||
allGather3Data[rank].nc = 6;
|
||||
else if (nranks == 8)
|
||||
// NCCL_MIN_NCHANNELS=56
|
||||
allGather3Data[rank].nc = 2;
|
||||
}
|
||||
|
||||
allGather3Data[rank].pivotA2AEnabled = comm->topo->pivotA2AEnabled && rcclParamPivotAlltoallEnable();
|
||||
comm->topo->ll128Enabled = comm->topo->ll128Enabled || rcclParamLL128ForceEnable();
|
||||
allGather3Data[rank].ll128Enabled = comm->topo->ll128Enabled;
|
||||
@@ -2788,4 +2800,4 @@ exit:
|
||||
return ret;
|
||||
fail:
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,306 @@
|
||||
<algo name="allreduce_pairs" proto="LL" nchannels="2" nchunksperloop="8" ngpus="4" coll="allreduce" inplace="1" outofplace="0" minBytes="2048" maxBytes="13312">
|
||||
<gpu id="0" i_chunks="8" o_chunks="0" s_chunks="24">
|
||||
<tb id="0" send="-1" recv="-1" chan="0">
|
||||
<step s="0" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="0" hasdep="0"/>
|
||||
<step s="1" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="0" hasdep="0"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="1" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="0" dstbuf="i" dstoff="0" cnt="1" depid="2" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="4" dstbuf="i" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="8" dstbuf="i" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="1" send="-1" recv="-1" chan="1">
|
||||
<step s="0" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="0" hasdep="0"/>
|
||||
<step s="1" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="0" hasdep="0"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="1" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="12" dstbuf="i" dstoff="4" cnt="1" depid="3" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="16" dstbuf="i" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="20" dstbuf="i" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="2" send="1" recv="1" chan="0">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="0" dstbuf="s" dstoff="0" cnt="4" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="0" dstbuf="s" dstoff="0" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="1" hasdep="0"/>
|
||||
<step s="4" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="1" dstbuf="i" dstoff="1" cnt="1" depid="4" deps="0" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="5" dstbuf="i" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="7" type="re" srcbuf="s" srcoff="9" dstbuf="i" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="3" send="1" recv="1" chan="1">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="4" dstbuf="s" dstoff="12" cnt="4" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="4" dstbuf="s" dstoff="12" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="1" hasdep="0"/>
|
||||
<step s="4" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="13" dstbuf="i" dstoff="5" cnt="1" depid="5" deps="0" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="17" dstbuf="i" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="7" type="re" srcbuf="s" srcoff="21" dstbuf="i" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="4" send="2" recv="2" chan="0">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="0" dstbuf="s" dstoff="0" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="0" dstbuf="s" dstoff="4" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="2" dstbuf="i" dstoff="2" cnt="1" depid="2" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="6" dstbuf="i" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="10" dstbuf="i" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="5" send="2" recv="2" chan="1">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="4" dstbuf="s" dstoff="12" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="4" dstbuf="s" dstoff="16" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="14" dstbuf="i" dstoff="6" cnt="1" depid="3" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="18" dstbuf="i" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="22" dstbuf="i" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="6" send="3" recv="3" chan="0">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="0" dstbuf="s" dstoff="0" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="0" dstbuf="s" dstoff="8" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="3" dstbuf="i" dstoff="3" cnt="1" depid="2" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="7" dstbuf="i" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="11" dstbuf="i" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="7" send="3" recv="3" chan="1">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="4" dstbuf="s" dstoff="12" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="4" dstbuf="s" dstoff="20" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="15" dstbuf="i" dstoff="7" cnt="1" depid="3" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="19" dstbuf="i" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="23" dstbuf="i" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
</gpu>
|
||||
<gpu id="1" i_chunks="8" o_chunks="0" s_chunks="24">
|
||||
<tb id="0" send="-1" recv="-1" chan="0">
|
||||
<step s="0" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="0" hasdep="0"/>
|
||||
<step s="1" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="0" hasdep="0"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="1" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="1" dstbuf="i" dstoff="1" cnt="1" depid="2" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="5" dstbuf="i" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="9" dstbuf="i" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="1" send="-1" recv="-1" chan="1">
|
||||
<step s="0" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="0" hasdep="0"/>
|
||||
<step s="1" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="0" hasdep="0"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="1" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="13" dstbuf="i" dstoff="5" cnt="1" depid="3" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="17" dstbuf="i" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="21" dstbuf="i" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="2" send="0" recv="0" chan="0">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="0" dstbuf="s" dstoff="0" cnt="4" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="0" dstbuf="s" dstoff="0" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="1" hasdep="0"/>
|
||||
<step s="4" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="0" dstbuf="i" dstoff="0" cnt="1" depid="4" deps="0" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="4" dstbuf="i" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="7" type="re" srcbuf="s" srcoff="8" dstbuf="i" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="3" send="0" recv="0" chan="1">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="4" dstbuf="s" dstoff="12" cnt="4" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="4" dstbuf="s" dstoff="12" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="1" hasdep="0"/>
|
||||
<step s="4" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="12" dstbuf="i" dstoff="4" cnt="1" depid="5" deps="0" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="16" dstbuf="i" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="7" type="re" srcbuf="s" srcoff="20" dstbuf="i" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="4" send="2" recv="2" chan="0">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="0" dstbuf="s" dstoff="4" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="0" dstbuf="s" dstoff="4" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="2" dstbuf="i" dstoff="2" cnt="1" depid="2" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="6" dstbuf="i" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="10" dstbuf="i" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="5" send="2" recv="2" chan="1">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="4" dstbuf="s" dstoff="16" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="4" dstbuf="s" dstoff="16" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="14" dstbuf="i" dstoff="6" cnt="1" depid="3" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="18" dstbuf="i" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="22" dstbuf="i" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="6" send="3" recv="3" chan="0">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="0" dstbuf="s" dstoff="4" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="0" dstbuf="s" dstoff="8" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="3" dstbuf="i" dstoff="3" cnt="1" depid="2" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="7" dstbuf="i" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="11" dstbuf="i" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="7" send="3" recv="3" chan="1">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="4" dstbuf="s" dstoff="16" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="4" dstbuf="s" dstoff="20" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="15" dstbuf="i" dstoff="7" cnt="1" depid="3" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="19" dstbuf="i" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="23" dstbuf="i" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
</gpu>
|
||||
<gpu id="2" i_chunks="8" o_chunks="0" s_chunks="24">
|
||||
<tb id="0" send="-1" recv="-1" chan="0">
|
||||
<step s="0" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="0" hasdep="0"/>
|
||||
<step s="1" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="0" hasdep="0"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="1" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="2" dstbuf="i" dstoff="2" cnt="1" depid="2" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="6" dstbuf="i" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="10" dstbuf="i" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="1" send="-1" recv="-1" chan="1">
|
||||
<step s="0" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="0" hasdep="0"/>
|
||||
<step s="1" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="0" hasdep="0"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="1" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="14" dstbuf="i" dstoff="6" cnt="1" depid="3" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="18" dstbuf="i" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="22" dstbuf="i" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="2" send="0" recv="0" chan="0">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="0" dstbuf="s" dstoff="4" cnt="4" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="0" dstbuf="s" dstoff="0" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="1" hasdep="0"/>
|
||||
<step s="4" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="0" dstbuf="i" dstoff="0" cnt="1" depid="6" deps="0" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="4" dstbuf="i" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="7" type="re" srcbuf="s" srcoff="8" dstbuf="i" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="3" send="0" recv="0" chan="1">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="4" dstbuf="s" dstoff="16" cnt="4" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="4" dstbuf="s" dstoff="12" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="1" hasdep="0"/>
|
||||
<step s="4" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="12" dstbuf="i" dstoff="4" cnt="1" depid="7" deps="0" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="16" dstbuf="i" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="7" type="re" srcbuf="s" srcoff="20" dstbuf="i" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="4" send="1" recv="1" chan="0">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="0" dstbuf="s" dstoff="4" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="0" dstbuf="s" dstoff="4" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="1" dstbuf="i" dstoff="1" cnt="1" depid="2" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="5" dstbuf="i" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="9" dstbuf="i" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="5" send="1" recv="1" chan="1">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="4" dstbuf="s" dstoff="16" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="4" dstbuf="s" dstoff="16" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="13" dstbuf="i" dstoff="5" cnt="1" depid="3" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="17" dstbuf="i" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="21" dstbuf="i" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="6" send="3" recv="3" chan="0">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="0" dstbuf="s" dstoff="8" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="0" dstbuf="s" dstoff="8" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="3" dstbuf="i" dstoff="3" cnt="1" depid="2" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="7" dstbuf="i" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="11" dstbuf="i" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="7" send="3" recv="3" chan="1">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="4" dstbuf="s" dstoff="20" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="4" dstbuf="s" dstoff="20" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="15" dstbuf="i" dstoff="7" cnt="1" depid="3" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="19" dstbuf="i" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="23" dstbuf="i" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
</gpu>
|
||||
<gpu id="3" i_chunks="8" o_chunks="0" s_chunks="24">
|
||||
<tb id="0" send="-1" recv="-1" chan="0">
|
||||
<step s="0" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="0" hasdep="0"/>
|
||||
<step s="1" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="0" hasdep="0"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="1" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="3" dstbuf="i" dstoff="3" cnt="1" depid="2" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="7" dstbuf="i" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="11" dstbuf="i" dstoff="3" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="1" send="-1" recv="-1" chan="1">
|
||||
<step s="0" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="0" hasdep="0"/>
|
||||
<step s="1" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="0" hasdep="0"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="1" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="15" dstbuf="i" dstoff="7" cnt="1" depid="3" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="19" dstbuf="i" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="23" dstbuf="i" dstoff="7" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="2" send="0" recv="0" chan="0">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="0" dstbuf="s" dstoff="8" cnt="4" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="0" dstbuf="s" dstoff="0" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="1" hasdep="0"/>
|
||||
<step s="4" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="0" dstbuf="i" dstoff="0" cnt="1" depid="4" deps="0" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="4" dstbuf="i" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="7" type="re" srcbuf="s" srcoff="8" dstbuf="i" dstoff="0" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="3" send="0" recv="0" chan="1">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="4" dstbuf="s" dstoff="20" cnt="4" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="4" dstbuf="s" dstoff="12" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="1" hasdep="0"/>
|
||||
<step s="4" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="12" dstbuf="i" dstoff="4" cnt="1" depid="5" deps="0" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="16" dstbuf="i" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="7" type="re" srcbuf="s" srcoff="20" dstbuf="i" dstoff="4" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="4" send="1" recv="1" chan="0">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="0" dstbuf="s" dstoff="8" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="0" dstbuf="s" dstoff="4" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="6" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="1" dstbuf="i" dstoff="1" cnt="1" depid="2" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="5" dstbuf="i" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="9" dstbuf="i" dstoff="1" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="5" send="1" recv="1" chan="1">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="4" dstbuf="s" dstoff="20" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="4" dstbuf="s" dstoff="16" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="7" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="13" dstbuf="i" dstoff="5" cnt="1" depid="3" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="17" dstbuf="i" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="21" dstbuf="i" dstoff="5" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="6" send="2" recv="2" chan="0">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="0" dstbuf="s" dstoff="8" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="0" dstbuf="s" dstoff="8" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="4" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="2" dstbuf="i" dstoff="2" cnt="1" depid="2" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="6" dstbuf="i" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="10" dstbuf="i" dstoff="2" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
<tb id="7" send="2" recv="2" chan="1">
|
||||
<step s="0" type="s" srcbuf="i" srcoff="4" dstbuf="s" dstoff="20" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="1" type="r" srcbuf="i" srcoff="4" dstbuf="s" dstoff="20" cnt="4" depid="-1" deps="-1" hasdep="1"/>
|
||||
<step s="2" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="0" hasdep="0"/>
|
||||
<step s="3" type="nop" srcbuf="i" srcoff="-1" dstbuf="o" dstoff="-1" cnt="0" depid="5" deps="1" hasdep="0"/>
|
||||
<step s="4" type="re" srcbuf="s" srcoff="14" dstbuf="i" dstoff="6" cnt="1" depid="3" deps="1" hasdep="0"/>
|
||||
<step s="5" type="re" srcbuf="s" srcoff="18" dstbuf="i" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
<step s="6" type="re" srcbuf="s" srcoff="22" dstbuf="i" dstoff="6" cnt="1" depid="-1" deps="-1" hasdep="0"/>
|
||||
</tb>
|
||||
</gpu>
|
||||
</algo>
|
||||
Reference in New Issue
Block a user