[Profiler] Enable ROCTX during build by default (#1506)

* Enable ROCTX during build by default

* Check for roctx support in cmake
このコミットが含まれているのは:
Bertan Dogancay
2025-01-29 11:29:46 -05:00
committed by GitHub
コミット 35fe9e06f3
4個のファイルの変更71行の追加40行の削除
+17 -5
ファイルの表示
@@ -29,7 +29,7 @@ option(ENABLE_MSCCL_KERNEL "Enable MSCCL while compiling"
option(ENABLE_MSCCLPP "Enable MSCCL++" ON)
option(ENABLE_IFC "Enable indirect function call" OFF)
option(INSTALL_DEPENDENCIES "Force install dependencies" OFF)
option(ROCTX "Enable ROCTX" OFF)
option(ROCTX "Enable ROCTX" ON)
option(PROFILE "Enable profiling" OFF)
option(TIMETRACE "Enable time-trace during compilation" OFF)
option(TRACE "Enable additional tracing" OFF)
@@ -320,6 +320,17 @@ if (ENABLE_MSCCLPP AND NOT(${HOST_OS_ID} STREQUAL "ubuntu" OR ${HOST_OS_ID} STRE
message(WARNING "MSCCL++ integration not supported on this OS (${HOST_OS_ID}); disabling MSCCL++ build")
endif()
# Check for ROCTX
if(ROCTX)
find_library(ROCTX_LIB NAMES roctx64)
if(ROCTX_LIB)
set(ROCTX_ENABLE ON)
message(STATUS "ROCTX library found: ${ROCTX_LIB}")
else()
message(WARNING "ROCTX library not found. Skipping ROCTX linking.")
endif()
endif()
# Determine version from makefiles/version.mk and fill in templates
#==================================================================================================
## parse version from Makefile NCCL_MAJOR, NCCL_MINOR, NCCL_PATCH must exist
@@ -701,9 +712,10 @@ endif()
if(PROFILE)
target_compile_definitions(rccl PRIVATE ENABLE_PROFILING)
endif()
if(NOT ROCTX)
if(ROCTX_ENABLE)
target_compile_definitions(rccl PRIVATE ROCTX_ENABLE)
else()
target_compile_definitions(rccl PRIVATE NVTX_NO_IMPL)
target_compile_definitions(rccl PRIVATE ROCTX_NO_IMPL)
target_compile_definitions(rccl PRIVATE NVTX_DISABLE)
endif()
if(TRACE)
@@ -802,10 +814,10 @@ if (HAVE_BFD)
target_link_libraries(rccl PRIVATE iberty z)
endif()
endif()
if (ROCTX)
if (ROCTX_ENABLE)
target_link_libraries(rccl PRIVATE -lroctx64)
endif()
target_link_libraries(rccl PRIVATE -fgpu-rdc) # Required when linking relocatable device code
target_link_libraries(rccl PRIVATE -fgpu-rdc) # Required when linking relocatable device code
target_link_libraries(rccl PRIVATE Threads::Threads)
target_link_libraries(rccl INTERFACE hip::host)
target_link_libraries(rccl PRIVATE hip::device)
+6 -6
ファイルの表示
@@ -28,7 +28,7 @@ mscclpp_enabled=true
num_parallel_jobs=$(nproc)
npkit_enabled=false
openmp_test_enabled=false
roctx_enabled=false
roctx_enabled=true
run_tests=false
run_tests_all=false
time_trace=false
@@ -47,6 +47,7 @@ function display_help()
echo " --disable-colltrace Build without collective trace"
echo " --disable-msccl-kernel Build without MSCCL kernels"
echo " --disable-mscclpp Build without MSCCL++ support"
echo " --disable-roctx Build without ROCTX logging"
echo " -f|--fast Quick-build RCCL (local gpu arch only, no backtrace, and collective trace support)"
echo " -h|--help Prints this help message"
echo " -i|--install Install RCCL library (see --prefix argument below)"
@@ -56,7 +57,6 @@ function display_help()
echo " --no_clean Don't delete files if they already exist"
echo " --npkit-enable Compile with npkit enabled"
echo " --openmp-test-enable Enable OpenMP in rccl unit tests"
echo " --roctx-enable Compile with roctx enabled (example usage: rocprof --roctx-trace ./rccl-program)"
echo " -p|--package_build Build RCCL package"
echo " --prefix Specify custom directory to install RCCL to (default: \`/opt/rocm\`)"
echo " --rm-legacy-include-dir Remove legacy include dir Packaging added for file/folder reorg backward compatibility"
@@ -97,6 +97,7 @@ while true; do
--disable-colltrace) collective_trace=false; shift ;;
--disable-msccl-kernel) msccl_kernel_enabled=false; shift ;;
--disable-mscclpp) mscclpp_enabled=false; shift ;;
--disable-roctx) roctx_enabled=false; shift ;;
-f | --fast) build_local_gpu_only=true; collective_trace=false; msccl_kernel_enabled=false; shift ;;
-h | --help) display_help; exit 0 ;;
-i | --install) install_library=true; shift ;;
@@ -106,7 +107,6 @@ while true; do
--no_clean) clean_build=false; shift ;;
--npkit-enable) npkit_enabled=true; shift ;;
--openmp-test-enable) openmp_test_enabled=true; shift ;;
--roctx-enable) roctx_enabled=true; shift ;;
-p | --package_build) build_package=true; shift ;;
--prefix) install_library=true; install_prefix=${2}; shift 2 ;;
--rm-legacy-include-dir) build_freorg_bkwdcomp=false; shift ;;
@@ -251,9 +251,9 @@ if [[ "${install_library}" == true ]]; then
cmake_common_options="${cmake_common_options} -DCMAKE_INSTALL_PREFIX=${install_prefix}"
fi
# Enable ROCTX
if [[ "${roctx_enabled}" == true ]]; then
cmake_common_options="${cmake_common_options} -DROCTX=ON"
# Disable ROCTX
if [[ "${roctx_enabled}" == false ]]; then
cmake_common_options="${cmake_common_options} -DROCTX=OFF"
fi
# Enable OpenMP in unit tests
+7 -24
ファイルの表示
@@ -1,5 +1,5 @@
/*************************************************************************
* Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
@@ -11,7 +11,7 @@
#include <string.h>
#include <map>
#ifndef ROCTX_NO_IMPL
#ifdef ROCTX_ENABLE
#include <roctracer/roctx.h>
#endif
#include "nvtx3/nvtx3.hpp"
@@ -126,40 +126,23 @@ public:
* 'numEntries', and 'schemaName'
*/
explicit roctx_scoped_range_in(const nvtxPayloadSchemaEntry_t* schema, const nvtxPayloadData_t* data,
const size_t numEntries, const char* schemaName) noexcept
{
#ifndef ROCTX_NO_IMPL
roctxAlloc(&payloadInfo, numEntries);
extractPayloadInfo(schema, data, numEntries, schemaName, &payloadInfo);
roctxRangePushA(payloadInfo.message);
#endif
}
const size_t numEntries, const char* schemaName) noexcept;
/**
* Construct a 'roctx_scoped_range_in' with the specified 'message'
*/
explicit roctx_scoped_range_in(const char* message) noexcept
{
#ifndef ROCTX_NO_IMPL
roctxRangePushA(message);
#endif
}
explicit roctx_scoped_range_in(const char* message) noexcept;
/**
* Default constructor 'roctx_scoped_range_in'
*/
roctx_scoped_range_in() noexcept : roctx_scoped_range_in{""} {/*no impl*/}
roctx_scoped_range_in() noexcept;
/**
* Destroy the roctx_scoped_range_in, ending the ROCTX range event.
*/
~roctx_scoped_range_in() noexcept
{
#ifndef ROCTX_NO_IMPL
roctxRangePop();
roctxFree(&payloadInfo);
#endif
}
~roctx_scoped_range_in() noexcept;
private:
roctxPayloadInfo payloadInfo;
};
+41 -5
ファイルの表示
@@ -1,10 +1,12 @@
/*************************************************************************
* Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/
#include "roctx.h"
#include "param.h"
#include "debug.h"
std::map<uint64_t, roctxPayloadEntryType> nvtxToRoctx {
{NVTX_PAYLOAD_ENTRY_TYPE_INT, ROCTX_PAYLOAD_ENTRY_TYPE_INT},
@@ -15,21 +17,17 @@ const char* roctxEntryTypeStr[ROCTX_PAYLOAD_NUM_ENTRY_TYPES] = {"ROCTX_PAYLOAD_E
const char* ncclRedOpStr[ncclNumDevRedOps] = { "Sum", "Prod", "MinMax", "PreMulSum", "SumPostDiv" };
void roctxAlloc(roctxPayloadInfo_t payloadInfo, const size_t numEntries) {
#ifndef ROCTX_NO_IMPL
// Allocate enough memory for numEntries in payloadEntries
payloadInfo->payloadEntries = (roctxPayloadSchemaEntryInfo*)malloc(numEntries * sizeof(roctxPayloadSchemaEntryInfo));
// Allocate memory for the message that will be constructed
payloadInfo->message = (char*)malloc(MAX_MESSAGE_LENGTH * sizeof(char));
#endif
}
void roctxFree(roctxPayloadInfo_t payloadInfo) {
#ifndef ROCTX_NO_IMPL
// Free all the dynamically allocated resources by roctx
if (payloadInfo->payloadEntries) free(payloadInfo->payloadEntries);
if (payloadInfo->message) free((void*)payloadInfo->message);
#endif
}
void extractPayloadInfo(const nvtxPayloadSchemaEntry_t* schema, const nvtxPayloadData_t* data, const size_t numEntries,
@@ -96,4 +94,42 @@ void stringify(roctxPayloadInfo_t payloadInfo) {
}
snprintf(payloadInfo->message + offset, MAX_MESSAGE_LENGTH - offset, "}");
}
RCCL_PARAM(LogRoctx, "LOG_ROCTX", 0);
roctx_scoped_range_in::roctx_scoped_range_in(const nvtxPayloadSchemaEntry_t* schema, const nvtxPayloadData_t* data,
const size_t numEntries, const char* schemaName) noexcept {
if (rcclParamLogRoctx()) {
roctxAlloc(&payloadInfo, numEntries);
extractPayloadInfo(schema, data, numEntries, schemaName, &payloadInfo);
#ifdef ROCTX_ENABLE
roctxRangePushA(payloadInfo.message);
#else
WARN("ROCTX_ENABLE is not defined. Please rebuild with -DROCTX_ENABLE=ON");
#endif
}
}
roctx_scoped_range_in::roctx_scoped_range_in(const char* message) noexcept {
if (rcclParamLogRoctx()) {
#ifdef ROCTX_ENABLE
roctxRangePushA(message);
#else
WARN("ROCTX_ENABLE is not defined. Please rebuild with -DROCTX_ENABLE=ON");
#endif
}
}
roctx_scoped_range_in::roctx_scoped_range_in() noexcept : roctx_scoped_range_in{""} {/*no impl*/}
roctx_scoped_range_in::~roctx_scoped_range_in() noexcept {
if (rcclParamLogRoctx()) {
#ifdef ROCTX_ENABLE
roctxRangePop();
#else
WARN("ROCTX_ENABLE is not defined. Please rebuild with -DROCTX_ENABLE=ON");
#endif
roctxFree(&payloadInfo);
}
}