From 2b9394f08aef38e9cb682048d2bfcd727129cf60 Mon Sep 17 00:00:00 2001 From: qiwei_ji Date: Tue, 7 Jan 2025 09:09:27 +0800 Subject: [PATCH] Check nvlink_node instead of xgmi_node in xml.cc (#1407) It seems like here wants to check xgmi_node instead. If checks node for "nvlink", it will verify the link_info everytime. If checks node for "xgmi", when get yes answer, it won't need check vsmi topo interface. [ROCm/rccl commit: f2ee8d913270164bc7b272445f9772d5d0015b0e] --- projects/rccl/src/graph/xml.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/projects/rccl/src/graph/xml.cc b/projects/rccl/src/graph/xml.cc index de7fcb1253..c03d3a3aee 100644 --- a/projects/rccl/src/graph/xml.cc +++ b/projects/rccl/src/graph/xml.cc @@ -766,7 +766,11 @@ ncclResult_t ncclTopoGetXmlFromGpu(struct ncclXmlNode* pciNode, uint32_t rocmDev NCCLCHECK(xmlGetAttrInt(gpuNode, "arch", &arch.value)); struct ncclXmlNode* nvlNode = NULL; +#if defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__) + NCCLCHECK(xmlGetSub(gpuNode, "xgmi", &nvlNode)); +#else NCCLCHECK(xmlGetSub(gpuNode, "nvlink", &nvlNode)); +#endif if (nvlNode == NULL) { #if defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__) const char* busId;