From 1f7945286c6f91ccf711431bf29fbcfb01e00142 Mon Sep 17 00:00:00 2001 From: Nusrat Islam Date: Wed, 2 Oct 2024 11:52:04 -0500 Subject: [PATCH] Enable MSCCLPP use in CPX mode (#1355) This PR enables the use of MSCCLPP in CPX mode for 8 GPUs. [ROCm/rccl commit: d13f9c44f5d5ae4053e696af282f93a9c28cb471] --- projects/rccl/cmake/MSCCLPP.cmake | 5 ++++- projects/rccl/ext-src/cpx.patch | 12 ++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 projects/rccl/ext-src/cpx.patch diff --git a/projects/rccl/cmake/MSCCLPP.cmake b/projects/rccl/cmake/MSCCLPP.cmake index f0d60a3e87..54e3fb9654 100644 --- a/projects/rccl/cmake/MSCCLPP.cmake +++ b/projects/rccl/cmake/MSCCLPP.cmake @@ -63,7 +63,10 @@ if(ENABLE_MSCCLPP) WORKING_DIRECTORY ${MSCCLPP_SOURCE} ) endif() - + execute_process( + COMMAND git apply ${CMAKE_CURRENT_SOURCE_DIR}/ext-src/cpx.patch + WORKING_DIRECTORY ${MSCCLPP_SOURCE} + ) message(STATUS "Building mscclpp only for gfx942.") mscclpp_cmake_arg(CMAKE_PREFIX_PATH) diff --git a/projects/rccl/ext-src/cpx.patch b/projects/rccl/ext-src/cpx.patch new file mode 100644 index 0000000000..1b11521bd1 --- /dev/null +++ b/projects/rccl/ext-src/cpx.patch @@ -0,0 +1,12 @@ +diff --git a/src/numa.cc b/src/numa.cc +index d72c99e..16c903d 100644 +--- a/src/numa.cc ++++ b/src/numa.cc +@@ -26,6 +26,7 @@ namespace mscclpp { + + MSCCLPP_API_CPP int getDeviceNumaNode(int cudaDev) { + std::string busId = getBusId(cudaDev); ++ busId[busId.length() - 1] = '0'; + std::string file_str = "/sys/bus/pci/devices/" + busId + "/numa_node"; + std::ifstream file(file_str); + int numaNode;