diff --git a/CMakeLists.txt b/CMakeLists.txt index 0cbad9aa11..38f0848157 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,7 +29,7 @@ option(ENABLE_MSCCL_KERNEL "Enable MSCCL while compiling" option(ENABLE_MSCCLPP "Enable MSCCL++" ON) option(ENABLE_IFC "Enable indirect function call" OFF) option(INSTALL_DEPENDENCIES "Force install dependencies" OFF) -option(ROCTX "Enable ROCTX" OFF) +option(ROCTX "Enable ROCTX" ON) option(PROFILE "Enable profiling" OFF) option(TIMETRACE "Enable time-trace during compilation" OFF) option(TRACE "Enable additional tracing" OFF) @@ -320,6 +320,17 @@ if (ENABLE_MSCCLPP AND NOT(${HOST_OS_ID} STREQUAL "ubuntu" OR ${HOST_OS_ID} STRE message(WARNING "MSCCL++ integration not supported on this OS (${HOST_OS_ID}); disabling MSCCL++ build") endif() +# Check for ROCTX +if(ROCTX) + find_library(ROCTX_LIB NAMES roctx64) + if(ROCTX_LIB) + set(ROCTX_ENABLE ON) + message(STATUS "ROCTX library found: ${ROCTX_LIB}") + else() + message(WARNING "ROCTX library not found. Skipping ROCTX linking.") + endif() +endif() + # Determine version from makefiles/version.mk and fill in templates #================================================================================================== ## parse version from Makefile NCCL_MAJOR, NCCL_MINOR, NCCL_PATCH must exist @@ -701,9 +712,10 @@ endif() if(PROFILE) target_compile_definitions(rccl PRIVATE ENABLE_PROFILING) endif() -if(NOT ROCTX) +if(ROCTX_ENABLE) + target_compile_definitions(rccl PRIVATE ROCTX_ENABLE) +else() target_compile_definitions(rccl PRIVATE NVTX_NO_IMPL) - target_compile_definitions(rccl PRIVATE ROCTX_NO_IMPL) target_compile_definitions(rccl PRIVATE NVTX_DISABLE) endif() if(TRACE) @@ -802,10 +814,10 @@ if (HAVE_BFD) target_link_libraries(rccl PRIVATE iberty z) endif() endif() -if (ROCTX) +if (ROCTX_ENABLE) target_link_libraries(rccl PRIVATE -lroctx64) endif() -target_link_libraries(rccl PRIVATE -fgpu-rdc) # Required when linking relocatable device code +target_link_libraries(rccl PRIVATE -fgpu-rdc) # Required when linking relocatable device code target_link_libraries(rccl PRIVATE Threads::Threads) target_link_libraries(rccl INTERFACE hip::host) target_link_libraries(rccl PRIVATE hip::device) diff --git a/install.sh b/install.sh index d393277dde..c174be03ea 100755 --- a/install.sh +++ b/install.sh @@ -28,7 +28,7 @@ mscclpp_enabled=true num_parallel_jobs=$(nproc) npkit_enabled=false openmp_test_enabled=false -roctx_enabled=false +roctx_enabled=true run_tests=false run_tests_all=false time_trace=false @@ -47,6 +47,7 @@ function display_help() echo " --disable-colltrace Build without collective trace" echo " --disable-msccl-kernel Build without MSCCL kernels" echo " --disable-mscclpp Build without MSCCL++ support" + echo " --disable-roctx Build without ROCTX logging" echo " -f|--fast Quick-build RCCL (local gpu arch only, no backtrace, and collective trace support)" echo " -h|--help Prints this help message" echo " -i|--install Install RCCL library (see --prefix argument below)" @@ -56,7 +57,6 @@ function display_help() echo " --no_clean Don't delete files if they already exist" echo " --npkit-enable Compile with npkit enabled" echo " --openmp-test-enable Enable OpenMP in rccl unit tests" - echo " --roctx-enable Compile with roctx enabled (example usage: rocprof --roctx-trace ./rccl-program)" echo " -p|--package_build Build RCCL package" echo " --prefix Specify custom directory to install RCCL to (default: \`/opt/rocm\`)" echo " --rm-legacy-include-dir Remove legacy include dir Packaging added for file/folder reorg backward compatibility" @@ -97,6 +97,7 @@ while true; do --disable-colltrace) collective_trace=false; shift ;; --disable-msccl-kernel) msccl_kernel_enabled=false; shift ;; --disable-mscclpp) mscclpp_enabled=false; shift ;; + --disable-roctx) roctx_enabled=false; shift ;; -f | --fast) build_local_gpu_only=true; collective_trace=false; msccl_kernel_enabled=false; shift ;; -h | --help) display_help; exit 0 ;; -i | --install) install_library=true; shift ;; @@ -106,7 +107,6 @@ while true; do --no_clean) clean_build=false; shift ;; --npkit-enable) npkit_enabled=true; shift ;; --openmp-test-enable) openmp_test_enabled=true; shift ;; - --roctx-enable) roctx_enabled=true; shift ;; -p | --package_build) build_package=true; shift ;; --prefix) install_library=true; install_prefix=${2}; shift 2 ;; --rm-legacy-include-dir) build_freorg_bkwdcomp=false; shift ;; @@ -251,9 +251,9 @@ if [[ "${install_library}" == true ]]; then cmake_common_options="${cmake_common_options} -DCMAKE_INSTALL_PREFIX=${install_prefix}" fi -# Enable ROCTX -if [[ "${roctx_enabled}" == true ]]; then - cmake_common_options="${cmake_common_options} -DROCTX=ON" +# Disable ROCTX +if [[ "${roctx_enabled}" == false ]]; then + cmake_common_options="${cmake_common_options} -DROCTX=OFF" fi # Enable OpenMP in unit tests diff --git a/src/include/roctx.h b/src/include/roctx.h index 1bb76be196..2ffafcb9ef 100644 --- a/src/include/roctx.h +++ b/src/include/roctx.h @@ -1,5 +1,5 @@ /************************************************************************* - * Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved. * * See LICENSE.txt for license information ************************************************************************/ @@ -11,7 +11,7 @@ #include #include -#ifndef ROCTX_NO_IMPL +#ifdef ROCTX_ENABLE #include #endif #include "nvtx3/nvtx3.hpp" @@ -126,40 +126,23 @@ public: * 'numEntries', and 'schemaName' */ explicit roctx_scoped_range_in(const nvtxPayloadSchemaEntry_t* schema, const nvtxPayloadData_t* data, - const size_t numEntries, const char* schemaName) noexcept - { -#ifndef ROCTX_NO_IMPL - roctxAlloc(&payloadInfo, numEntries); - extractPayloadInfo(schema, data, numEntries, schemaName, &payloadInfo); - roctxRangePushA(payloadInfo.message); -#endif - } + const size_t numEntries, const char* schemaName) noexcept; /** * Construct a 'roctx_scoped_range_in' with the specified 'message' */ - explicit roctx_scoped_range_in(const char* message) noexcept - { -#ifndef ROCTX_NO_IMPL - roctxRangePushA(message); -#endif - } + explicit roctx_scoped_range_in(const char* message) noexcept; /** * Default constructor 'roctx_scoped_range_in' */ - roctx_scoped_range_in() noexcept : roctx_scoped_range_in{""} {/*no impl*/} + roctx_scoped_range_in() noexcept; /** * Destroy the roctx_scoped_range_in, ending the ROCTX range event. */ - ~roctx_scoped_range_in() noexcept - { -#ifndef ROCTX_NO_IMPL - roctxRangePop(); - roctxFree(&payloadInfo); -#endif - } + ~roctx_scoped_range_in() noexcept; + private: roctxPayloadInfo payloadInfo; }; diff --git a/src/misc/roctx.cc b/src/misc/roctx.cc index d3e5deef60..9c34b4ccf5 100644 --- a/src/misc/roctx.cc +++ b/src/misc/roctx.cc @@ -1,10 +1,12 @@ /************************************************************************* - * Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved. * * See LICENSE.txt for license information ************************************************************************/ #include "roctx.h" +#include "param.h" +#include "debug.h" std::map nvtxToRoctx { {NVTX_PAYLOAD_ENTRY_TYPE_INT, ROCTX_PAYLOAD_ENTRY_TYPE_INT}, @@ -15,21 +17,17 @@ const char* roctxEntryTypeStr[ROCTX_PAYLOAD_NUM_ENTRY_TYPES] = {"ROCTX_PAYLOAD_E const char* ncclRedOpStr[ncclNumDevRedOps] = { "Sum", "Prod", "MinMax", "PreMulSum", "SumPostDiv" }; void roctxAlloc(roctxPayloadInfo_t payloadInfo, const size_t numEntries) { -#ifndef ROCTX_NO_IMPL // Allocate enough memory for numEntries in payloadEntries payloadInfo->payloadEntries = (roctxPayloadSchemaEntryInfo*)malloc(numEntries * sizeof(roctxPayloadSchemaEntryInfo)); // Allocate memory for the message that will be constructed payloadInfo->message = (char*)malloc(MAX_MESSAGE_LENGTH * sizeof(char)); -#endif } void roctxFree(roctxPayloadInfo_t payloadInfo) { -#ifndef ROCTX_NO_IMPL // Free all the dynamically allocated resources by roctx if (payloadInfo->payloadEntries) free(payloadInfo->payloadEntries); if (payloadInfo->message) free((void*)payloadInfo->message); -#endif } void extractPayloadInfo(const nvtxPayloadSchemaEntry_t* schema, const nvtxPayloadData_t* data, const size_t numEntries, @@ -96,4 +94,42 @@ void stringify(roctxPayloadInfo_t payloadInfo) { } snprintf(payloadInfo->message + offset, MAX_MESSAGE_LENGTH - offset, "}"); +} + +RCCL_PARAM(LogRoctx, "LOG_ROCTX", 0); + +roctx_scoped_range_in::roctx_scoped_range_in(const nvtxPayloadSchemaEntry_t* schema, const nvtxPayloadData_t* data, + const size_t numEntries, const char* schemaName) noexcept { + if (rcclParamLogRoctx()) { + roctxAlloc(&payloadInfo, numEntries); + extractPayloadInfo(schema, data, numEntries, schemaName, &payloadInfo); +#ifdef ROCTX_ENABLE + roctxRangePushA(payloadInfo.message); +#else + WARN("ROCTX_ENABLE is not defined. Please rebuild with -DROCTX_ENABLE=ON"); +#endif + } +} + +roctx_scoped_range_in::roctx_scoped_range_in(const char* message) noexcept { + if (rcclParamLogRoctx()) { +#ifdef ROCTX_ENABLE + roctxRangePushA(message); +#else + WARN("ROCTX_ENABLE is not defined. Please rebuild with -DROCTX_ENABLE=ON"); +#endif + } +} + +roctx_scoped_range_in::roctx_scoped_range_in() noexcept : roctx_scoped_range_in{""} {/*no impl*/} + +roctx_scoped_range_in::~roctx_scoped_range_in() noexcept { + if (rcclParamLogRoctx()) { +#ifdef ROCTX_ENABLE + roctxRangePop(); +#else + WARN("ROCTX_ENABLE is not defined. Please rebuild with -DROCTX_ENABLE=ON"); +#endif + roctxFree(&payloadInfo); + } } \ No newline at end of file