Package RCCL headers to support adding RCCL support w/o installed headers (#1075)

- in ROCm CI, rocprofiler-sdk gets built before RCCL is installed, this is a workaround for this issue

[ROCm/rocprofiler-sdk commit: 8c1382fceb]
Этот коммит содержится в:
Jonathan R. Madsen
2024-09-12 18:24:50 -05:00
коммит произвёл GitHub
родитель 3a5154c5ff
Коммит fcd6cc45bd
16 изменённых файлов: 1235 добавлений и 102 удалений
+7
Просмотреть файл
@@ -99,6 +99,13 @@ else()
add_library(@PACKAGE_NAME@::@PACKAGE_NAME@-external-nolink INTERFACE IMPORTED)
# if rccl not found or <rccl/amd_detail/api_trace.h> not found when rocprofiler-sdk
# was built, use the packaged rccl.h and api_trace.h
if(NOT @rccl_FOUND@ OR NOT @rccl_API_TRACE_FOUND@)
target_compile_definitions(@PACKAGE_NAME@::@PACKAGE_NAME@-external-nolink
INTERFACE ROCPROFILER_SDK_USE_SYSTEM_RCCL=0)
endif()
include("${@PACKAGE_NAME@_CMAKE_DIR}/@PACKAGE_NAME@-targets.cmake")
@PROJECT_NAME@_config_nolink_target(@PACKAGE_NAME@::@PACKAGE_NAME@-external-nolink hip::host)
+12 -2
Просмотреть файл
@@ -301,9 +301,9 @@ target_link_libraries(rocprofiler-otf2 INTERFACE otf2::otf2)
# RCCL
#
# ----------------------------------------------------------------------------------------#
find_package(
rccl
REQUIRED
CONFIG
HINTS
${rocm_version_DIR}
@@ -314,4 +314,14 @@ find_package(
PATH_SUFFIXES
lib/cmake/rccl)
rocprofiler_config_nolink_target(rocprofiler-rccl-nolink rccl::rccl)
if(rccl_FOUND
AND rccl_INCLUDE_DIR
AND EXISTS "${rccl_INCLUDE_DIR}/rccl/amd_detail/api_trace.h")
set(rccl_API_TRACE_FOUND ON)
rocprofiler_config_nolink_target(rocprofiler-rccl-nolink rccl::rccl)
else()
set(rccl_API_TRACE_FOUND OFF)
target_compile_definitions(rocprofiler-rccl-nolink
INTERFACE ROCPROFILER_SDK_USE_SYSTEM_RCCL=0)
endif()
+1
Просмотреть файл
@@ -40,5 +40,6 @@ install(
add_subdirectory(hip)
add_subdirectory(hsa)
add_subdirectory(marker)
add_subdirectory(rccl)
add_subdirectory(cxx)
add_subdirectory(amd_detail)
+13
Просмотреть файл
@@ -0,0 +1,13 @@
#
#
# Installation of public RCCL headers
#
#
set(ROCPROFILER_RCCL_HEADER_FILES api_args.h api_id.h table_id.h)
install(
FILES ${ROCPROFILER_RCCL_HEADER_FILES}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/rccl
COMPONENT development)
add_subdirectory(details)
+17 -1
Просмотреть файл
@@ -25,7 +25,23 @@
#include <rocprofiler-sdk/defines.h>
#include <rocprofiler-sdk/version.h>
#include <rccl/rccl.h>
#if !defined(ROCPROFILER_SDK_USE_SYSTEM_RCCL)
# if defined __has_include
# if __has_include(<rccl/rccl.h>)
# define ROCPROFILER_SDK_USE_SYSTEM_RCCL 1
# else
# define ROCPROFILER_SDK_USE_SYSTEM_RCCL 0
# endif
# else
# define ROCPROFILER_SDK_USE_SYSTEM_RCCL 0
# endif
#endif
#if ROCPROFILER_SDK_USE_SYSTEM_RCCL > 0
# include <rccl/rccl.h>
#else
# include <rocprofiler-sdk/rccl/details/rccl.h>
#endif
#include <stdint.h>
+11
Просмотреть файл
@@ -0,0 +1,11 @@
#
#
# Installation of public RCCL headers
#
#
set(ROCPROFILER_RCCL_DETAILS_HEADER_FILES api_trace.h rccl.h)
install(
FILES ${ROCPROFILER_RCCL_DETAILS_HEADER_FILES}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/rccl/details
COMPONENT development)
@@ -22,7 +22,23 @@
#pragma once
#include <rccl/rccl.h>
#if !defined(ROCPROFILER_SDK_USE_SYSTEM_RCCL)
# if defined __has_include
# if __has_include(<rccl/rccl.h>)
# define ROCPROFILER_SDK_USE_SYSTEM_RCCL 1
# else
# define ROCPROFILER_SDK_USE_SYSTEM_RCCL 0
# endif
# else
# define ROCPROFILER_SDK_USE_SYSTEM_RCCL 0
# endif
#endif
#if ROCPROFILER_SDK_USE_SYSTEM_RCCL > 0
# include <rccl/rccl.h>
#else
# include <rocprofiler-sdk/rccl/details/rccl.h>
#endif
#include <stddef.h>
#include <stdint.h>
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+3 -5
Просмотреть файл
@@ -1444,15 +1444,13 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
&get_buffers().rccl_api_trace),
"buffer creation");
auto _status =
ROCPROFILER_CALL(
rocprofiler_configure_buffer_tracing_service(get_client_ctx(),
ROCPROFILER_BUFFER_TRACING_RCCL_API,
nullptr,
0,
get_buffers().rccl_api_trace);
if(_status != ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED)
ROCPROFILER_CALL(_status, "buffer tracing service for rccl api configure");
get_buffers().rccl_api_trace),
"buffer tracing service for rccl api configure");
}
if(tool::get_config().counter_collection)
+4 -4
Просмотреть файл
@@ -101,10 +101,10 @@ get_unsupported()
{
auto unsupported = std::unordered_set<rocprofiler_buffer_tracing_kind_t>{};
#if ROCPROFILER_SDK_RCCL_HAS_API_TRACE == 0
// Built against RCCL which does not support API tracing
unsupported.emplace(ROCPROFILER_BUFFER_TRACING_RCCL_API);
#endif
// #if ROCPROFILER_SDK_USE_SYSTEM_RCCL == 0
// // Built against RCCL which does not support API tracing
// unsupported.emplace(ROCPROFILER_BUFFER_TRACING_RCCL_API);
// #endif
return unsupported;
}
+4 -4
Просмотреть файл
@@ -98,10 +98,10 @@ get_unsupported()
{
auto unsupported = std::unordered_set<rocprofiler_callback_tracing_kind_t>{};
#if ROCPROFILER_SDK_RCCL_HAS_API_TRACE == 0
// Built against RCCL which does not support API tracing
unsupported.emplace(ROCPROFILER_CALLBACK_TRACING_RCCL_API);
#endif
// #if ROCPROFILER_SDK_USE_SYSTEM_RCCL == 0
// // Built against RCCL which does not support API tracing
// unsupported.emplace(ROCPROFILER_CALLBACK_TRACING_RCCL_API);
// #endif
return unsupported;
}
-2
Просмотреть файл
@@ -3,5 +3,3 @@ set(ROCPROFILER_LIB_RCCL_HEADERS defines.hpp rccl.hpp)
target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_RCCL_SOURCES}
${ROCPROFILER_LIB_RCCL_HEADERS})
add_subdirectory(details)
+42 -46
Просмотреть файл
@@ -22,13 +22,11 @@
#include "lib/rocprofiler-sdk/rccl/rccl.hpp"
#if ROCPROFILER_SDK_RCCL_HAS_API_TRACE > 0
#include "lib/common/abi.hpp"
#include "lib/common/defines.hpp"
# include "lib/common/abi.hpp"
# include "lib/common/defines.hpp"
# include <rocprofiler-sdk/rccl.h>
# include <rocprofiler-sdk/version.h>
#include <rocprofiler-sdk/rccl.h>
#include <rocprofiler-sdk/version.h>
namespace rocprofiler
{
@@ -37,46 +35,44 @@ namespace rccl
static_assert(RCCL_API_TRACE_VERSION_MAJOR == 0, "Major version updated for RCCL dispatch table");
static_assert(RCCL_API_TRACE_VERSION_PATCH == 0, "Patch version updated for RCCL dispatch table");
ROCP_SDK_ENFORCE_ABI_VERSIONING(rcclApiFuncTable, 37)
ROCP_SDK_ENFORCE_ABI_VERSIONING(::rcclApiFuncTable, 37)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclAllGather_fn, 0)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclAllReduce_fn, 1)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclAllToAll_fn, 2)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclAllToAllv_fn, 3)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclBroadcast_fn, 4)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclGather_fn, 5)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclReduce_fn, 6)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclReduceScatter_fn, 7)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclScatter_fn, 8)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclSend_fn, 9)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclRecv_fn, 10)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclRedOpCreatePreMulSum_fn, 11)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclRedOpDestroy_fn, 12)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclGroupStart_fn, 13)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclGroupEnd_fn, 14)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclGetVersion_fn, 15)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclGetUniqueId_fn, 16)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclCommInitRank_fn, 17)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclCommInitAll_fn, 18)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclCommInitRankConfig_fn, 19)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclCommFinalize_fn, 20)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclCommDestroy_fn, 21)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclCommAbort_fn, 22)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclCommSplit_fn, 23)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclGetErrorString_fn, 24)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclGetLastError_fn, 25)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclCommGetAsyncError_fn, 26)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclCommCount_fn, 27)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclCommCuDevice_fn, 28)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclCommUserRank_fn, 29)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclMemAlloc_fn, 30)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclMemFree_fn, 31)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, mscclLoadAlgo_fn, 32)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, mscclRunAlgo_fn, 33)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, mscclUnloadAlgo_fn, 34)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclCommRegister_fn, 35)
ROCP_SDK_ENFORCE_ABI(rcclApiFuncTable, ncclCommDeregister_fn, 36)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclAllGather_fn, 0)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclAllReduce_fn, 1)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclAllToAll_fn, 2)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclAllToAllv_fn, 3)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclBroadcast_fn, 4)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclGather_fn, 5)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclReduce_fn, 6)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclReduceScatter_fn, 7)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclScatter_fn, 8)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclSend_fn, 9)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclRecv_fn, 10)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclRedOpCreatePreMulSum_fn, 11)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclRedOpDestroy_fn, 12)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclGroupStart_fn, 13)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclGroupEnd_fn, 14)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclGetVersion_fn, 15)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclGetUniqueId_fn, 16)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclCommInitRank_fn, 17)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclCommInitAll_fn, 18)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclCommInitRankConfig_fn, 19)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclCommFinalize_fn, 20)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclCommDestroy_fn, 21)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclCommAbort_fn, 22)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclCommSplit_fn, 23)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclGetErrorString_fn, 24)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclGetLastError_fn, 25)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclCommGetAsyncError_fn, 26)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclCommCount_fn, 27)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclCommCuDevice_fn, 28)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclCommUserRank_fn, 29)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclMemAlloc_fn, 30)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclMemFree_fn, 31)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, mscclLoadAlgo_fn, 32)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, mscclRunAlgo_fn, 33)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, mscclUnloadAlgo_fn, 34)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclCommRegister_fn, 35)
ROCP_SDK_ENFORCE_ABI(::rcclApiFuncTable, ncclCommDeregister_fn, 36)
} // namespace rccl
} // namespace rocprofiler
#endif
-8
Просмотреть файл
@@ -1,8 +0,0 @@
#
#
#
set(ROCPROFILER_LIB_RCCL_DETAILS_SOURCES)
set(ROCPROFILER_LIB_RCCL_DETAILS_HEADERS api_trace.h)
target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_RCCL_DETAILS_SOURCES}
${ROCPROFILER_LIB_RCCL_DETAILS_HEADERS})
+8 -8
Просмотреть файл
@@ -22,28 +22,28 @@
#pragma once
#if !defined(ROCPROFILER_SDK_RCCL_HAS_API_TRACE)
#if !defined(ROCPROFILER_SDK_USE_SYSTEM_RCCL)
# if defined __has_include
# if __has_include(<rccl/amd_detail/api_trace.h>)
# define ROCPROFILER_SDK_RCCL_HAS_API_TRACE 1
# define ROCPROFILER_SDK_USE_SYSTEM_RCCL 1
# else
# define ROCPROFILER_SDK_RCCL_HAS_API_TRACE 0
# define ROCPROFILER_SDK_USE_SYSTEM_RCCL 0
# endif
# else
# define ROCPROFILER_SDK_RCCL_HAS_API_TRACE 0
# define ROCPROFILER_SDK_USE_SYSTEM_RCCL 0
# endif
#endif
#if ROCPROFILER_SDK_RCCL_HAS_API_TRACE > 0
#if ROCPROFILER_SDK_USE_SYSTEM_RCCL > 0
# include <rccl/amd_detail/api_trace.h>
# include <rccl/rccl.h>
#else
# include "lib/rocprofiler-sdk/rccl/details/api_trace.h"
# include <rocprofiler-sdk/rccl/details/api_trace.h>
# include <rocprofiler-sdk/rccl/details/rccl.h>
#endif
#include <rocprofiler-sdk/rocprofiler.h>
#include <rccl/rccl.h>
#include <cstdint>
#include <vector>
+15 -21
Просмотреть файл
@@ -1101,17 +1101,14 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
nullptr),
"scratch memory tracing service configure");
{
auto _status =
rocprofiler_configure_callback_tracing_service(rccl_api_callback_ctx,
ROCPROFILER_CALLBACK_TRACING_RCCL_API,
nullptr,
0,
tool_tracing_callback,
nullptr);
if(_status != ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED)
ROCPROFILER_CALL(_status, "rccl api callback tracing service configure");
}
ROCPROFILER_CALL(
rocprofiler_configure_callback_tracing_service(rccl_api_callback_ctx,
ROCPROFILER_CALLBACK_TRACING_RCCL_API,
nullptr,
0,
tool_tracing_callback,
nullptr),
"rccl api callback tracing service configure");
constexpr auto buffer_size = 8192;
constexpr auto watermark = 7936;
@@ -1297,16 +1294,13 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
corr_id_retire_buffer),
"buffer tracing service for memory copy configure");
{
auto _status =
rocprofiler_configure_buffer_tracing_service(rccl_api_buffered_ctx,
ROCPROFILER_BUFFER_TRACING_RCCL_API,
nullptr,
0,
rccl_api_buffered_buffer);
if(_status != ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED)
ROCPROFILER_CALL(_status, "buffer tracing service configure");
}
ROCPROFILER_CALL(
rocprofiler_configure_buffer_tracing_service(rccl_api_buffered_ctx,
ROCPROFILER_BUFFER_TRACING_RCCL_API,
nullptr,
0,
rccl_api_buffered_buffer),
"buffer tracing service for rccl api configure");
ROCPROFILER_CALL(
rocprofiler_configure_buffered_dispatch_profile_counting_service(