From d5bcb63263fcc6b63464eaeb65bd9229ce659424 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Thu, 12 Sep 2024 00:46:07 -0500 Subject: [PATCH] rocprofv3 Kokkos-Tools Support (#1058) --- source/bin/rocprofv3.py | 16 ++ source/lib/CMakeLists.txt | 4 +- .../lib/rocprofiler-sdk-tool/CMakeLists.txt | 4 +- .../kokkosp/CMakeLists.txt | 29 ++ .../rocprofiler-sdk-tool/kokkosp/kokkosp.cpp | 270 ++++++++++++++++++ .../plugins/CMakeLists.txt | 1 - 6 files changed, 319 insertions(+), 5 deletions(-) create mode 100644 source/lib/rocprofiler-sdk-tool/kokkosp/CMakeLists.txt create mode 100644 source/lib/rocprofiler-sdk-tool/kokkosp/kokkosp.cpp delete mode 100644 source/lib/rocprofiler-sdk-tool/plugins/CMakeLists.txt diff --git a/source/bin/rocprofv3.py b/source/bin/rocprofv3.py index bc0146b243..fa5e257578 100755 --- a/source/bin/rocprofv3.py +++ b/source/bin/rocprofv3.py @@ -188,6 +188,11 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins "--rccl-trace", help="For collecting RCCL Traces", ) + add_parser_bool_argument( + basic_tracing_options, + "--kokkos-trace", + help="Enable built-in Kokkos Tools support (implies --marker-trace and --kernel-rename)", + ) extended_tracing_options = parser.add_argument_group("Granular tracing options") @@ -611,6 +616,9 @@ def run(app_args, args, **kwargs): ROCPROF_TOOL_LIBRARY = f"{ROCM_DIR}/lib/rocprofiler-sdk/librocprofiler-sdk-tool.so" ROCPROF_SDK_LIBRARY = f"{ROCM_DIR}/lib/librocprofiler-sdk.so" ROCPROF_ROCTX_LIBRARY = f"{ROCM_DIR}/lib/librocprofiler-sdk-roctx.so" + ROCPROF_KOKKOSP_LIBRARY = ( + f"{ROCM_DIR}/lib/rocprofiler-sdk/librocprofiler-sdk-tool-kokkosp.so" + ) prepend_preload = [itr for itr in args.preload if itr] append_preload = [ROCPROF_TOOL_LIBRARY, ROCPROF_SDK_LIBRARY] @@ -651,6 +659,14 @@ def run(app_args, args, **kwargs): "ROCPROF_OUTPUT_FORMAT", ",".join(args.output_format), append=True, join_char="," ) + if args.kokkos_trace: + update_env("KOKKOS_TOOLS_LIBS", ROCPROF_KOKKOSP_LIBRARY, append=True) + for itr in ( + "marker_trace", + "kernel_rename", + ): + setattr(args, itr, True) + if args.sys_trace: for itr in ( "hip_trace", diff --git a/source/lib/CMakeLists.txt b/source/lib/CMakeLists.txt index 0952ec7387..9f9992c419 100644 --- a/source/lib/CMakeLists.txt +++ b/source/lib/CMakeLists.txt @@ -6,9 +6,9 @@ add_subdirectory(common) add_subdirectory(rocprofiler-sdk) set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME "tools") -add_subdirectory(rocprofiler-sdk-tool) -add_subdirectory(rocprofiler-sdk-roctx) add_subdirectory(rocprofiler-sdk-codeobj) +add_subdirectory(rocprofiler-sdk-roctx) +add_subdirectory(rocprofiler-sdk-tool) if(ROCPROFILER_BUILD_TESTS) add_subdirectory(tests) diff --git a/source/lib/rocprofiler-sdk-tool/CMakeLists.txt b/source/lib/rocprofiler-sdk-tool/CMakeLists.txt index cb61d3ed44..77c4f113ba 100644 --- a/source/lib/rocprofiler-sdk-tool/CMakeLists.txt +++ b/source/lib/rocprofiler-sdk-tool/CMakeLists.txt @@ -39,8 +39,6 @@ set(TOOL_SOURCES add_library(rocprofiler-sdk-tool SHARED) target_sources(rocprofiler-sdk-tool PRIVATE ${TOOL_SOURCES} ${TOOL_HEADERS}) -add_subdirectory(plugins) - target_link_libraries( rocprofiler-sdk-tool PRIVATE rocprofiler-sdk::rocprofiler-shared-library @@ -81,3 +79,5 @@ install( DESTINATION ${CMAKE_INSTALL_LIBDIR}/rocprofiler-sdk COMPONENT tools EXPORT rocprofiler-sdk-tool-targets) + +add_subdirectory(kokkosp) diff --git a/source/lib/rocprofiler-sdk-tool/kokkosp/CMakeLists.txt b/source/lib/rocprofiler-sdk-tool/kokkosp/CMakeLists.txt new file mode 100644 index 0000000000..6a1f7763af --- /dev/null +++ b/source/lib/rocprofiler-sdk-tool/kokkosp/CMakeLists.txt @@ -0,0 +1,29 @@ +# +# Tool library used by rocprofiler +# + +rocprofiler_activate_clang_tidy() + +set(KOKKOSP_HEADERS) +set(KOKKOSP_SOURCES kokkosp.cpp) + +add_library(rocprofiler-sdk-tool-kokkosp SHARED) +target_sources(rocprofiler-sdk-tool-kokkosp PRIVATE ${KOKKOSP_SOURCES} ${KOKKOSP_HEADERS}) + +target_link_libraries( + rocprofiler-sdk-tool-kokkosp + PRIVATE rocprofiler-sdk-roctx::rocprofiler-sdk-roctx-shared-library + rocprofiler-sdk::rocprofiler-headers rocprofiler-sdk::rocprofiler-glog) + +set_target_properties( + rocprofiler-sdk-tool-kokkosp + PROPERTIES LIBRARY_OUTPUT_DIRECTORY + ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}/rocprofiler-sdk + BUILD_RPATH "\$ORIGIN:\$ORIGIN/.." + INSTALL_RPATH "\$ORIGIN:\$ORIGIN/..") + +install( + TARGETS rocprofiler-sdk-tool-kokkosp + DESTINATION ${CMAKE_INSTALL_LIBDIR}/rocprofiler-sdk + COMPONENT tools + EXPORT rocprofiler-sdk-tool-targets) diff --git a/source/lib/rocprofiler-sdk-tool/kokkosp/kokkosp.cpp b/source/lib/rocprofiler-sdk-tool/kokkosp/kokkosp.cpp new file mode 100644 index 0000000000..386fef75d1 --- /dev/null +++ b/source/lib/rocprofiler-sdk-tool/kokkosp/kokkosp.cpp @@ -0,0 +1,270 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "lib/common/environment.hpp" + +#include + +#include +#include +#include +#include +#include + +extern "C" { +struct Kokkos_Tools_ToolSettings +{ + bool requires_global_fencing; + bool padding[255]; +}; + +struct Kokkos_Profiling_KokkosPDeviceInfo +{ + size_t deviceID; +}; +} + +namespace +{ +struct Section +{ + Section() = default; + ~Section() = default; + Section(const Section&) = default; + Section(Section&&) noexcept = default; + Section& operator=(const Section&) = default; + Section& operator=(Section&&) noexcept = default; + + std::string label = {}; + roctx_range_id_t id = std::numeric_limits::max(); +}; + +bool tool_globfences = false; +auto kokkosp_sections = std::vector
{}; +} // namespace + +extern "C" { +void +kokkosp_request_tool_settings(const uint32_t, Kokkos_Tools_ToolSettings* settings) ROCTX_PUBLIC_API; + +void +kokkosp_init_library(const int /*loadSeq*/, + const uint64_t /*interfaceVer*/, + const uint32_t /*devInfoCount*/, + Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) ROCTX_PUBLIC_API; + +void +kokkosp_finalize_library() ROCTX_PUBLIC_API; + +void +kokkosp_begin_parallel_for(const char* /*name*/, + const uint32_t /*devID*/, + uint64_t* /*kID*/) ROCTX_PUBLIC_API; +void +kokkosp_end_parallel_for(const uint64_t /*kID*/) ROCTX_PUBLIC_API; + +void +kokkosp_begin_parallel_scan(const char* /*name*/, + const uint32_t /*devID*/, + uint64_t* /*kID*/) ROCTX_PUBLIC_API; +void +kokkosp_end_parallel_scan(const uint64_t /*kID*/) ROCTX_PUBLIC_API; + +void +kokkosp_begin_parallel_reduce(const char* /*name*/, + const uint32_t /*devID*/, + uint64_t* /*kID*/) ROCTX_PUBLIC_API; + +void +kokkosp_end_parallel_reduce(const uint64_t /*kID*/) ROCTX_PUBLIC_API; + +void +kokkosp_push_profile_region(const char* /*name*/) ROCTX_PUBLIC_API; + +void +kokkosp_pop_profile_region() ROCTX_PUBLIC_API; + +void +kokkosp_create_profile_section(const char* /*name*/, uint32_t* /*secid*/) ROCTX_PUBLIC_API; + +void +kokkosp_start_profile_section(const uint32_t /*secid*/) ROCTX_PUBLIC_API; + +void +kokkosp_stop_profile_section(const uint32_t /*secid*/) ROCTX_PUBLIC_API; + +void +kokkosp_destroy_profile_section(const uint32_t /*secid*/) ROCTX_PUBLIC_API; + +void +kokkosp_profile_event(const char* /*name*/) ROCTX_PUBLIC_API; + +void +kokkosp_begin_fence(const char* /*name*/, + const uint32_t /*devID*/, + uint64_t* /*fID*/) ROCTX_PUBLIC_API; + +void +kokkosp_end_fence(const uint64_t /*fID*/) ROCTX_PUBLIC_API; +} + +// +// +// IMPLEMENTATION +// +// +extern "C" { +void +kokkosp_request_tool_settings(const uint32_t, Kokkos_Tools_ToolSettings* settings) +{ + if(tool_globfences) + { + settings->requires_global_fencing = true; + } + else + { + settings->requires_global_fencing = false; + } +} + +void +kokkosp_init_library(const int loadSeq, + const uint64_t interfaceVer, + const uint32_t /*devInfoCount*/, + Kokkos_Profiling_KokkosPDeviceInfo* /*deviceInfo*/) +{ + tool_globfences = ::rocprofiler::common::get_env("KOKKOS_TOOLS_GLOBALFENCES", false); + + std::cout << "-----------------------------------------------------------\n" + << "KokkosP: rocprofv3 Connector (sequence is " << loadSeq + << ", version: " << interfaceVer << ")\n" + << "-----------------------------------------------------------\n"; + + roctxMark("Kokkos::Initialization Complete"); +} + +void +kokkosp_finalize_library() +{ + std::cout << R"( +----------------------------------------------------------- +KokkosP: Finalization of rocprofv3 Connector. Complete. +----------------------------------------------------------- +)"; + + roctxMark("Kokkos::Finalization Complete"); +} + +void +kokkosp_begin_parallel_for(const char* name, const uint32_t /*devID*/, uint64_t* /*kID*/) +{ + roctxRangePush(name); +} + +void +kokkosp_end_parallel_for(const uint64_t /*kID*/) +{ + roctxRangePop(); +} + +void +kokkosp_begin_parallel_scan(const char* name, const uint32_t /*devID*/, uint64_t* /*kID*/) +{ + roctxRangePush(name); +} + +void +kokkosp_end_parallel_scan(const uint64_t /*kID*/) +{ + roctxRangePop(); +} + +void +kokkosp_begin_parallel_reduce(const char* name, const uint32_t /*devID*/, uint64_t* /*kID*/) +{ + roctxRangePush(name); +} + +void +kokkosp_end_parallel_reduce(const uint64_t /*kID*/) +{ + roctxRangePop(); +} + +void +kokkosp_push_profile_region(const char* name) +{ + roctxRangePush(name); +} + +void +kokkosp_pop_profile_region() +{ + roctxRangePop(); +} + +void +kokkosp_create_profile_section(const char* name, uint32_t* secid) +{ + *secid = kokkosp_sections.size(); + kokkosp_sections.emplace_back( + Section{std::string{name}, std::numeric_limits::max()}); +} + +void +kokkosp_start_profile_section(const uint32_t secid) +{ + auto& section = kokkosp_sections[secid]; + section.id = roctxRangeStart(section.label.c_str()); +} + +void +kokkosp_stop_profile_section(const uint32_t secid) +{ + auto const& section = kokkosp_sections[secid]; + roctxRangeStop(section.id); +} + +void +kokkosp_destroy_profile_section(const uint32_t) +{ + // do nothing +} + +void +kokkosp_profile_event(const char* name) +{ + roctxMark(name); +} + +void +kokkosp_begin_fence(const char* name, const uint32_t /*devID*/, uint64_t* fID) +{ + *fID = roctxRangeStart(name); +} + +void +kokkosp_end_fence(const uint64_t fID) +{ + roctxRangeStop(fID); +} +} diff --git a/source/lib/rocprofiler-sdk-tool/plugins/CMakeLists.txt b/source/lib/rocprofiler-sdk-tool/plugins/CMakeLists.txt deleted file mode 100644 index 8b13789179..0000000000 --- a/source/lib/rocprofiler-sdk-tool/plugins/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -