diff --git a/tools/topo_expl/include/device_table.h b/tools/topo_expl/include/device_table.h index 8610626558..d37b0d2215 100644 --- a/tools/topo_expl/include/device_table.h +++ b/tools/topo_expl/include/device_table.h @@ -2,8 +2,14 @@ #ifndef DEVICE_TABLE_COMPATIBILITY #define DEVICE_TABLE_COMPATIBILITY -__forceinline__ __device__ void NCCL_CALL_FUNCTIONS(unsigned short funcIndex) noexcept {} -__forceinline__ __device__ void NCCL_CALL_FUNCTIONS_1(unsigned short funcIndex) noexcept {} -__forceinline__ __device__ void NCCL_CALL_FUNCTIONS_2(unsigned short funcIndex) noexcept {} -__forceinline__ __device__ void NCCL_CALL_FUNCTIONS_4(unsigned short funcIndex) noexcept {} + +struct rcclKernelItem { + void* funcPtr; + int unroll; +}; +static struct rcclKernelItem rcclKernelTable[] = { }; + +template +__forceinline__ __device__ void NCCL_CALL_FUNCTIONS(unsigned short funcIndex) noexcept { } + #endif diff --git a/tools/topo_expl/topo_expl.cpp b/tools/topo_expl/topo_expl.cpp index 56455c833b..e63387d59a 100644 --- a/tools/topo_expl/topo_expl.cpp +++ b/tools/topo_expl/topo_expl.cpp @@ -255,11 +255,13 @@ int main(int argc,char* argv[]) node_model = network.GetNode(i); assert(node_model!=0); initTransportsRank_3(&comm[i], allGather3Data, treeGraph[i], ringGraph[i], collNetGraph[i], nvlsGraph[i]); + CUDACHECK(hipDeviceGetAttribute(&comm[i].WarpSize, hipDeviceAttributeWarpSize, comm[i].cudaDev)); } for (uint64_t len = 8; len <= 4294967296L; len *= 2) { struct ncclInfo info; float minTime = 3600000000.0; info.comm = &comm[0]; + info.coll = ncclFuncAllReduce; // Find algorithm / protocol. int algorithm = -1;