From a64aab5f63410af54482a63f401b4b8d5459e048 Mon Sep 17 00:00:00 2001 From: Wenkai Du <43822138+wenkaidu@users.noreply.github.com> Date: Wed, 8 May 2024 14:32:24 -0700 Subject: [PATCH] Use rocm-smi thread only mutex when available (#1169) --- CMakeLists.txt | 12 ++++++++++++ src/misc/rocm_smi_wrap.cc | 4 ++++ 2 files changed, 16 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 29f1404322..4a22ba3c58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -175,6 +175,15 @@ else() set(ROCM_SMI_LIBRARIES rocm_smi64) endif() check_include_file_cxx("${ROCM_SMI_INCLUDE_DIR}/rocm_smi/rocm_smi64Config.h" HAVE_ROCM_SMI64CONFIG) +### Check for RSMI_INIT_FLAG_THRAD_ONLY_MUTEX support +file(READ "${ROCM_SMI_INCLUDE_DIR}/rocm_smi/rocm_smi.h" rocm_smi_incl) +string(FIND "${rocm_smi_incl}" "RSMI_INIT_FLAG_THRAD_ONLY_MUTEX" matchres) +if(${matchres} EQUAL -1) + message(STATUS "RSMI_INIT_FLAG_THRAD_ONLY_MUTEX not supported") +else() + message(STATUS "RSMI_INIT_FLAG_THRAD_ONLY_MUTEX supported") + set(HAVE_ROCM_SMI_THREAD_ONLY_MUTEX True) +endif () ## Check for BFD library if custom backtrace is requested if(BUILD_BFD) @@ -543,6 +552,9 @@ endif() if(HAVE_ROCM_SMI64CONFIG) target_compile_definitions(rccl PRIVATE USE_ROCM_SMI64CONFIG) endif() +if(HAVE_ROCM_SMI_THREAD_ONLY_MUTEX) + target_compile_definitions(rccl PRIVATE USE_ROCM_SMI_THREAD_ONLY_MUTEX) +endif() if(NPKIT_FLAGS) target_compile_definitions(rccl PRIVATE ${NPKIT_FLAGS}) endif() diff --git a/src/misc/rocm_smi_wrap.cc b/src/misc/rocm_smi_wrap.cc index 82b70a04de..4c8987713f 100644 --- a/src/misc/rocm_smi_wrap.cc +++ b/src/misc/rocm_smi_wrap.cc @@ -35,7 +35,11 @@ THE SOFTWARE. ncclResult_t rocm_smi_init() { +#ifdef USE_ROCM_SMI_THREAD_ONLY_MUTEX + ROCMSMICHECK(rsmi_init(RSMI_INIT_FLAG_THRAD_ONLY_MUTEX)); +#else ROCMSMICHECK(rsmi_init(0)); +#endif rsmi_version_t version; ROCMSMICHECK(rsmi_version_get(&version)); INFO(NCCL_INIT, "rocm_smi_lib: version %d.%d.%d.%s", version.major, version.minor, version.patch, version.build);