Fix topo_explorer compatibility and capture WarpSize (#1743)
[ROCm/rccl commit: fb4ad82d0d]
이 커밋은 다음에 포함됨:
@@ -2,8 +2,14 @@
|
||||
|
||||
#ifndef DEVICE_TABLE_COMPATIBILITY
|
||||
#define DEVICE_TABLE_COMPATIBILITY
|
||||
__forceinline__ __device__ void NCCL_CALL_FUNCTIONS(unsigned short funcIndex) noexcept {}
|
||||
__forceinline__ __device__ void NCCL_CALL_FUNCTIONS_1(unsigned short funcIndex) noexcept {}
|
||||
__forceinline__ __device__ void NCCL_CALL_FUNCTIONS_2(unsigned short funcIndex) noexcept {}
|
||||
__forceinline__ __device__ void NCCL_CALL_FUNCTIONS_4(unsigned short funcIndex) noexcept {}
|
||||
|
||||
struct rcclKernelItem {
|
||||
void* funcPtr;
|
||||
int unroll;
|
||||
};
|
||||
static struct rcclKernelItem rcclKernelTable[] = { };
|
||||
|
||||
template <int unroll>
|
||||
__forceinline__ __device__ void NCCL_CALL_FUNCTIONS(unsigned short funcIndex) noexcept { }
|
||||
|
||||
#endif
|
||||
|
||||
@@ -255,11 +255,13 @@ int main(int argc,char* argv[])
|
||||
node_model = network.GetNode(i);
|
||||
assert(node_model!=0);
|
||||
initTransportsRank_3(&comm[i], allGather3Data, treeGraph[i], ringGraph[i], collNetGraph[i], nvlsGraph[i]);
|
||||
CUDACHECK(hipDeviceGetAttribute(&comm[i].WarpSize, hipDeviceAttributeWarpSize, comm[i].cudaDev));
|
||||
}
|
||||
for (uint64_t len = 8; len <= 4294967296L; len *= 2) {
|
||||
struct ncclInfo info;
|
||||
float minTime = 3600000000.0;
|
||||
info.comm = &comm[0];
|
||||
|
||||
info.coll = ncclFuncAllReduce;
|
||||
// Find algorithm / protocol.
|
||||
int algorithm = -1;
|
||||
|
||||
새 이슈에서 참조
사용자 차단