From f0eae846630305b5c31f7958a9165e884ebef807 Mon Sep 17 00:00:00 2001 From: Nilesh M Negi Date: Wed, 8 Jan 2025 08:38:51 -0600 Subject: [PATCH] [MSCCLPP] IBVerbs: Check if IBV_ACCESS_RELAXED_ORDERING exists (#1483) Signed-off-by: nileshnegi --- cmake/MSCCLPP.cmake | 8 +++ ext-src/check_ibv_access_relaxed_ordering.cc | 8 +++ .../mscclpp_ibv_access_relaxed_ordering.patch | 51 +++++++++++++++++++ 3 files changed, 67 insertions(+) create mode 100644 ext-src/check_ibv_access_relaxed_ordering.cc create mode 100644 ext-src/mscclpp_ibv_access_relaxed_ordering.patch diff --git a/cmake/MSCCLPP.cmake b/cmake/MSCCLPP.cmake index a20a4033e9..e38922d5e7 100644 --- a/cmake/MSCCLPP.cmake +++ b/cmake/MSCCLPP.cmake @@ -71,6 +71,10 @@ if(ENABLE_MSCCLPP) COMMAND git apply ${CMAKE_CURRENT_SOURCE_DIR}/ext-src/read-allred.patch WORKING_DIRECTORY ${MSCCLPP_SOURCE} ) + execute_process( + COMMAND git apply ${CMAKE_CURRENT_SOURCE_DIR}/ext-src/mscclpp_ibv_access_relaxed_ordering.patch + WORKING_DIRECTORY ${MSCCLPP_SOURCE} + ) message(STATUS "Building mscclpp only for gfx942.") @@ -105,6 +109,10 @@ if(ENABLE_MSCCLPP) COMMAND git apply --reverse ${CMAKE_CURRENT_SOURCE_DIR}/ext-src/read-allred.patch WORKING_DIRECTORY ${MSCCLPP_SOURCE} ) + execute_process( + COMMAND git apply --reverse ${CMAKE_CURRENT_SOURCE_DIR}/ext-src/mscclpp_ibv_access_relaxed_ordering.patch + WORKING_DIRECTORY ${MSCCLPP_SOURCE} + ) endif() execute_process(COMMAND objcopy diff --git a/ext-src/check_ibv_access_relaxed_ordering.cc b/ext-src/check_ibv_access_relaxed_ordering.cc new file mode 100644 index 0000000000..65c675a109 --- /dev/null +++ b/ext-src/check_ibv_access_relaxed_ordering.cc @@ -0,0 +1,8 @@ +#include +#include + +int main(void) { + enum ibv_access_flags has_ibv_access_relaxed_ordering = IBV_ACCESS_RELAXED_ORDERING; + printf("IBV_ACCESS_RELAXED_ORDERING: %d\n", has_ibv_access_relaxed_ordering); + return 0; +} diff --git a/ext-src/mscclpp_ibv_access_relaxed_ordering.patch b/ext-src/mscclpp_ibv_access_relaxed_ordering.patch new file mode 100644 index 0000000000..bd776ec8fc --- /dev/null +++ b/ext-src/mscclpp_ibv_access_relaxed_ordering.patch @@ -0,0 +1,51 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index a95a8e5..62b4f22 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -96,6 +96,24 @@ include(${PROJECT_SOURCE_DIR}/cmake/AddFormatTargets.cmake) + + # Find ibverbs and libnuma + find_package(IBVerbs) ++ ++# Check if IBV_ACCESS_RELAXED_ORDERING exists in infiniband/verbs.h ++# Disable use of this symbol in mscclpp/src/ib.cc if it does not exist ++if(IBVERBS_FOUND) ++ try_compile(HAS_IBV_ACCESS_RELAXED_ORDERING ++ ${CMAKE_BINARY_DIR} ++ "${CMAKE_CURRENT_SOURCE_DIR}/../check_ibv_access_relaxed_ordering.cc" ++ CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${IBVERBS_INCLUDE_DIRS}" ++ OUTPUT_VARIABLE try_compile_output ++ ) ++ message(STATUS "try_compile_output: ${try_compile_output}") ++ if(NOT HAS_IBV_ACCESS_RELAXED_ORDERING) ++ message(WARNING "IBV_ACCESS_RELAXED_ORDERING does not exist in ${IBVERBS_INCLUDE_DIRS}/infiniband/verbs.h. Disabling this symbol in mscclpp/src/ib.cc.") ++ else() ++ message(STATUS "IBV_ACCESS_RELAXED_ORDERING exists in ${IBVERBS_INCLUDE_DIRS}/infiniband/verbs.h.") ++ endif() ++endif() ++ + find_package(NUMA REQUIRED) + find_package(Threads REQUIRED) + +diff --git a/src/ib.cc b/src/ib.cc +index d9d72d1..bddd4a8 100644 +--- a/src/ib.cc ++++ b/src/ib.cc +@@ -48,9 +48,17 @@ IbMr::IbMr(ibv_pd* pd, void* buff, std::size_t size) : buff(buff) { + } + uintptr_t addr = reinterpret_cast(buff) & -pageSize; + std::size_t pages = (size + (reinterpret_cast(buff) - addr) + pageSize - 1) / pageSize; ++ ++#if defined(HAS_IBV_ACCESS_RELAXED_ORDERING) + this->mr = IBVerbs::ibv_reg_mr2(pd, reinterpret_cast(addr), pages * pageSize, + IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | + IBV_ACCESS_RELAXED_ORDERING | IBV_ACCESS_REMOTE_ATOMIC); ++#else ++ this->mr = IBVerbs::ibv_reg_mr2(pd, reinterpret_cast(addr), pages * pageSize, ++ IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | ++ IBV_ACCESS_REMOTE_ATOMIC); ++#endif ++ + if (this->mr == nullptr) { + std::stringstream err; + err << "ibv_reg_mr failed (errno " << errno << ")";