diff --git a/.github/workflows/rocprofiler-systems-ubuntu-jammy.yml b/.github/workflows/rocprofiler-systems-ubuntu-jammy.yml index 10d198ffb1..ee2ccfd81b 100644 --- a/.github/workflows/rocprofiler-systems-ubuntu-jammy.yml +++ b/.github/workflows/rocprofiler-systems-ubuntu-jammy.yml @@ -474,7 +474,7 @@ jobs: -DROCPROFSYS_USE_ROCM=OFF -DROCPROFSYS_USE_RCCL=OFF -DROCPROFSYS_MAX_THREADS=64 - -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode;jpegdecode;openmp-target" + -DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode;jpegdecode;openmp" -DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }} -- -LE "transpose|rccl|videodecode|jpegdecode|network" diff --git a/projects/rocprofiler-systems/cmake/Packages.cmake b/projects/rocprofiler-systems/cmake/Packages.cmake index fd9b302a0b..75c93858c0 100644 --- a/projects/rocprofiler-systems/cmake/Packages.cmake +++ b/projects/rocprofiler-systems/cmake/Packages.cmake @@ -630,12 +630,7 @@ set(TIMEMORY_QUIET_CONFIG ON CACHE BOOL "Make timemory configuration quieter") # timemory feature settings set(TIMEMORY_USE_GOTCHA ON CACHE BOOL "Enable GOTCHA support in timemory") set(TIMEMORY_USE_PERFETTO OFF CACHE BOOL "Disable perfetto support in timemory") -set(TIMEMORY_USE_OMPT - ${ROCPROFSYS_USE_OMPT} - CACHE BOOL - "Enable OMPT support in timemory" - FORCE -) +set(TIMEMORY_USE_OMPT OFF CACHE BOOL "Enable OMPT support in timemory" FORCE) set(TIMEMORY_USE_PAPI ${ROCPROFSYS_USE_PAPI} CACHE BOOL diff --git a/projects/rocprofiler-systems/examples/openmp/target/library.cpp b/projects/rocprofiler-systems/examples/openmp/target/library.cpp index b676678a1d..68ab67a74e 100644 --- a/projects/rocprofiler-systems/examples/openmp/target/library.cpp +++ b/projects/rocprofiler-systems/examples/openmp/target/library.cpp @@ -140,6 +140,12 @@ run_impl() int run() { +#if _OPENMP == 202011 + std::cout << "Compiler OpenMP version == 202011" << std::endl; +#elif _OPENMP == 201811 + std::cout << "Compiler OpenMP version == 201811" << std::endl; +#endif + #pragma omp parallel { run_impl(); diff --git a/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp b/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp index 7225a21b62..ce005f1d8b 100644 --- a/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp +++ b/projects/rocprofiler-systems/source/bin/rocprof-sys-sample/impl.cpp @@ -134,10 +134,6 @@ get_initial_environment() update_env(_env, "ROCPROFSYS_USE_SAMPLING", (_mode != "causal")); -#if defined(ROCPROFSYS_USE_OMPT) - if(!getenv("OMP_TOOL_LIBRARIES")) - update_env(_env, "OMP_TOOL_LIBRARIES", _dl_libpath, UPD_APPEND); -#endif return _env; } @@ -769,9 +765,6 @@ parse_args(int argc, char** argv, std::vector& _env) _update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); - if(_v.count("all") > 0 || _v.count("ompt") > 0) - update_env(_env, "OMP_TOOL_LIBRARIES", _dl_libpath, UPD_APPEND); - if(_v.count("all") > 0 || _v.count("kokkosp") > 0) update_env(_env, "KOKKOS_TOOLS_LIBS", _omni_libpath, UPD_APPEND); }); @@ -793,15 +786,6 @@ parse_args(int argc, char** argv, std::vector& _env) _update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); - // if(_v.count("all") > 0 || _v.count("rocprofiler") > 0) - // { - // remove_env(_env, "ROCP_TOOL_LIB"); - // remove_env(_env, "ROCP_HSA_INTERCEPT"); - // } - - if(_v.count("all") > 0 || _v.count("ompt") > 0) - remove_env(_env, "OMP_TOOL_LIBRARIES"); - if(_v.count("all") > 0 || _v.count("kokkosp") > 0) remove_env(_env, "KOKKOS_TOOLS_LIBS"); }); diff --git a/projects/rocprofiler-systems/source/lib/common/setup.hpp b/projects/rocprofiler-systems/source/lib/common/setup.hpp index 0a46176868..1fc213366a 100644 --- a/projects/rocprofiler-systems/source/lib/common/setup.hpp +++ b/projects/rocprofiler-systems/source/lib/common/setup.hpp @@ -109,25 +109,6 @@ get_environ(int _verbose, std::string _search_paths = {}, _omnilib = common::path::find_path(_omnilib, _verbose, _search_paths); _omnilib_dl = common::path::find_path(_omnilib_dl, _verbose, _search_paths); -#if defined(ROCPROFSYS_USE_OMPT) && ROCPROFSYS_USE_OMPT > 0 - if(get_env("ROCPROFSYS_USE_OMPT", true)) - { - std::string _omni_omp_libs = _omnilib_dl; - const char* _omp_libs = getenv("OMP_TOOL_LIBRARIES"); - int _override = 0; - if(_omp_libs != nullptr && - std::string_view{ _omp_libs }.find(_omnilib_dl) == std::string::npos) - { - _override = 1; - _omni_omp_libs = common::join(':', _omp_libs, _omnilib_dl); - } - ROCPROFSYS_SETUP_LOG(_verbose >= 2, "setting OMP_TOOL_LIBRARIES to '%s'\n", - _omni_omp_libs.c_str()); - _data.emplace_back( - env_config{ "OMP_TOOL_LIBRARIES", _omni_omp_libs.c_str(), _override }); - } -#endif - return _data; } diff --git a/projects/rocprofiler-systems/source/lib/core/argparse.cpp b/projects/rocprofiler-systems/source/lib/core/argparse.cpp index a8026c26f8..03e0b9f6ef 100644 --- a/projects/rocprofiler-systems/source/lib/core/argparse.cpp +++ b/projects/rocprofiler-systems/source/lib/core/argparse.cpp @@ -239,11 +239,6 @@ init_parser(parser_data& _data) auto _libexecpath = get_realpath(get_internal_script_path()); update_env(_data, "ROCPROFSYS_SCRIPT_PATH", _libexecpath, UPD_REPLACE); -#if defined(ROCPROFSYS_USE_OMPT) - if(!getenv("OMP_TOOL_LIBRARIES")) - update_env(_data, "OMP_TOOL_LIBRARIES", _data.dl_libpath, UPD_PREPEND); -#endif - return _data; } @@ -619,10 +614,6 @@ add_core_arguments(parser_t& _parser, parser_data& _data) _update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); - if(_v.count("all") > 0 || _v.count("ompt") > 0) - update_env(_data, "OMP_TOOL_LIBRARIES", _data.dl_libpath, - UPD_PREPEND); - if(_v.count("all") > 0 || _v.count("kokkosp") > 0) update_env(_data, "KOKKOS_TOOLS_LIBS", _data.omni_libpath, UPD_PREPEND); @@ -653,9 +644,6 @@ add_core_arguments(parser_t& _parser, parser_data& _data) _update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0); _update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0); - if(_v.count("all") > 0 || _v.count("ompt") > 0) - remove_env(_data, "OMP_TOOL_LIBRARIES"); - if(_v.count("all") > 0 || _v.count("kokkosp") > 0) remove_env(_data, "KOKKOS_TOOLS_LIBS"); }); diff --git a/projects/rocprofiler-systems/source/lib/core/categories.hpp b/projects/rocprofiler-systems/source/lib/core/categories.hpp index 4cb45df5eb..9bf9cb1187 100644 --- a/projects/rocprofiler-systems/source/lib/core/categories.hpp +++ b/projects/rocprofiler-systems/source/lib/core/categories.hpp @@ -105,6 +105,7 @@ ROCPROFSYS_DEFINE_CATEGORY(category, rocm_marker_api, ROCPROFSYS_CATEGORY_ROCM_M ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rocdecode_api, ROCPROFSYS_CATEGORY_ROCM_ROCDECODE_API, "rocm_rocdecode_api", "ROCm RocDecode API") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rocjpeg_api, ROCPROFSYS_CATEGORY_ROCM_ROCJPEG_API, "rocm_rocjpeg_api", "ROCm RocJPEG API") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rccl_api, ROCPROFSYS_CATEGORY_ROCM_RCCL_API, "rocm_rccl_api", "ROCm RCCL API") +ROCPROFSYS_DEFINE_CATEGORY(category, rocm_ompt_api, ROCPROFSYS_CATEGORY_ROCM_OMPT_API, "rocm_ompt_api", "ROCm OMPT API") ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi, ROCPROFSYS_CATEGORY_AMD_SMI, "amd_smi", "AMD-SMI data") ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_gfx_busy, ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_GFX, "device_busy_gfx", "Busy percentage of GFX engine on a GPU device") ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_umc_busy, ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_UMC, "device_busy_umc", "Busy percentage of UMC engin on a GPU device") @@ -177,6 +178,7 @@ using name = perfetto_category; ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_rocdecode_api), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_rocjpeg_api), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_rccl_api), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_ompt_api), \ ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi), \ ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_gfx_busy), \ ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_umc_busy), \ diff --git a/projects/rocprofiler-systems/source/lib/core/config.cpp b/projects/rocprofiler-systems/source/lib/core/config.cpp index b8afbd652d..cc9c19aba9 100644 --- a/projects/rocprofiler-systems/source/lib/core/config.cpp +++ b/projects/rocprofiler-systems/source/lib/core/config.cpp @@ -1930,12 +1930,8 @@ get_use_vaapi_tracing() bool get_use_ompt() { -#if defined(TIMEMORY_USE_OMPT) static auto _v = get_config()->find("ROCPROFSYS_USE_OMPT"); return static_cast&>(*_v->second).get(); -#else - return false; -#endif } bool diff --git a/projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.cpp b/projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.cpp index bfe786920b..463087b434 100644 --- a/projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.cpp +++ b/projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.cpp @@ -414,6 +414,7 @@ get_callback_domains() { // Argument tracing is supported in rocprofiler-sdk 0.6.0 and later supported.emplace(ROCPROFILER_CALLBACK_TRACING_RCCL_API); + supported.emplace(ROCPROFILER_CALLBACK_TRACING_OMPT); supported.emplace(ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API); } # endif @@ -436,6 +437,15 @@ get_callback_domains() _data.emplace(ROCPROFILER_CALLBACK_TRACING_RCCL_API); } +# if ROCPROFILER_VERSION >= 600 + if(config::get_use_ompt() && _version.formatted >= 600) + { + // Translate some configuration settings to rocprofiler domains + _data.emplace(ROCPROFILER_CALLBACK_TRACING_OMPT); + } +# endif + + // Check that the domains are valid const auto valid_choices = settings::instance()->at("ROCPROFSYS_ROCM_DOMAINS")->get_choices(); diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h b/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h index f5e6b1e3dd..d359fe7bbe 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h @@ -57,6 +57,7 @@ extern "C" ROCPROFSYS_CATEGORY_ROCM_ROCDECODE_API, ROCPROFSYS_CATEGORY_ROCM_ROCJPEG_API, ROCPROFSYS_CATEGORY_ROCM_RCCL_API, + ROCPROFSYS_CATEGORY_ROCM_OMPT_API, ROCPROFSYS_CATEGORY_AMD_SMI, ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_GFX, ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_UMC, diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp index aeff6cf976..05d182fa71 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp @@ -56,7 +56,6 @@ #include "library/components/pthread_gotcha.hpp" #include "library/components/vaapi_gotcha.hpp" #include "library/coverage.hpp" -#include "library/ompt.hpp" #include "library/process_sampler.hpp" #include "library/ptl.hpp" #include "library/rocprofiler-sdk.hpp" @@ -85,6 +84,7 @@ #if ROCPROFSYS_USE_ROCM > 0 # include +# include #endif #include @@ -602,12 +602,6 @@ rocprofsys_init_tooling_hidden(void) } } - if(get_use_ompt()) - { - ROCPROFSYS_VERBOSE_F(1, "Setting up OMPT...\n"); - ompt::setup(); - } - if(get_use_perfetto()) { ROCPROFSYS_VERBOSE_F(1, "Starting Perfetto...\n"); @@ -853,12 +847,6 @@ rocprofsys_finalize_hidden(void) component::vaapi_gotcha::shutdown(); } - if(get_use_ompt()) - { - ROCPROFSYS_VERBOSE_F(1, "Shutting down OMPT...\n"); - ompt::shutdown(); - } - #if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0 if(get_use_rocm()) { diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/CMakeLists.txt index 4bc4b29082..4ac5864543 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/CMakeLists.txt @@ -3,7 +3,6 @@ set(library_sources ${CMAKE_CURRENT_LIST_DIR}/coverage.cpp ${CMAKE_CURRENT_LIST_DIR}/cpu_freq.cpp ${CMAKE_CURRENT_LIST_DIR}/kokkosp.cpp - ${CMAKE_CURRENT_LIST_DIR}/ompt.cpp ${CMAKE_CURRENT_LIST_DIR}/perf.cpp ${CMAKE_CURRENT_LIST_DIR}/process_sampler.cpp ${CMAKE_CURRENT_LIST_DIR}/ptl.cpp @@ -17,7 +16,6 @@ set(library_sources set(library_headers ${CMAKE_CURRENT_LIST_DIR}/coverage.hpp ${CMAKE_CURRENT_LIST_DIR}/cpu_freq.hpp - ${CMAKE_CURRENT_LIST_DIR}/ompt.hpp ${CMAKE_CURRENT_LIST_DIR}/process_sampler.hpp ${CMAKE_CURRENT_LIST_DIR}/perf.hpp ${CMAKE_CURRENT_LIST_DIR}/ptl.hpp @@ -58,7 +56,6 @@ set(ndebug_sources ${CMAKE_CURRENT_LIST_DIR}/components/backtrace_metrics.cpp ${CMAKE_CURRENT_LIST_DIR}/kokkosp.cpp ${CMAKE_CURRENT_LIST_DIR}/amd_smi.cpp - ${CMAKE_CURRENT_LIST_DIR}/ompt.cpp ) set_source_files_properties( diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/ompt.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/ompt.cpp deleted file mode 100644 index ab5d756971..0000000000 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/ompt.cpp +++ /dev/null @@ -1,539 +0,0 @@ -// MIT License -// -// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "api.hpp" -#include "core/common.hpp" -#include "core/config.hpp" -#include "core/debug.hpp" -#include "core/defines.hpp" - -#include - -#if defined(ROCPROFSYS_USE_OMPT) && ROCPROFSYS_USE_OMPT > 0 - -# include "binary/link_map.hpp" -# include "core/components/fwd.hpp" -# include "library/components/category_region.hpp" -# include "library/tracing.hpp" - -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include - -# include -# include -# include -# include - -using api_t = tim::project::rocprofsys; - -namespace rocprofsys -{ -namespace component -{ -struct ompt : comp::base -{ - using value_type = void; - using base_type = comp::base; - using context_info_t = tim::openmp::context_info; - - static std::string label() { return "ompt"; } - static std::string description() { return "OpenMP tools tracing"; } - - ompt() = default; - ~ompt() = default; - ompt(const ompt&) = default; - ompt(ompt&&) noexcept = default; - - ompt& operator=(const ompt&) = default; - ompt& operator=(ompt&&) noexcept = default; - - template - void start(const context_info_t& _ctx_info, Args&&...) const - { - category_region::start(m_prefix); - - auto _ts = tracing::now(); - uint64_t _cid = - (_ctx_info.target_arguments) ? _ctx_info.target_arguments->host_op_id : 0; - auto _annotate = [&](::perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation(ctx, "begin_ns", _ts); - for(const auto& itr : _ctx_info.arguments) - tracing::add_perfetto_annotation(ctx, itr.label, itr.value); - } - }; - - if(_cid > 0) - { - category_region::start( - (_ctx_info.func.empty()) ? m_prefix : _ctx_info.func, _ts, - ::perfetto::Flow::ProcessScoped(_cid), std::move(_annotate)); - } - else - { - category_region::start( - (_ctx_info.func.empty()) ? m_prefix : _ctx_info.func, _ts, - std::move(_annotate)); - } - } - - template - void stop(const context_info_t& _ctx_info, Args&&...) const - { - category_region::stop(m_prefix); - - auto _ts = tracing::now(); - uint64_t _cid = - (_ctx_info.target_arguments) ? _ctx_info.target_arguments->host_op_id : 0; - auto _annotate = [&](::perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - tracing::add_perfetto_annotation(ctx, "end_ns", _ts); - for(const auto& itr : _ctx_info.arguments) - tracing::add_perfetto_annotation(ctx, itr.label, itr.value); - } - }; - - if(_cid > 0) - { - category_region::stop( - (_ctx_info.func.empty()) ? m_prefix : _ctx_info.func, _ts, - std::move(_annotate)); - } - else - { - category_region::stop( - (_ctx_info.func.empty()) ? m_prefix : _ctx_info.func, _ts, - std::move(_annotate)); - } - } - - template - void store(const context_info_t& _ctx_info, Args&&... _args) const - { - start(_ctx_info, std::forward(_args)...); - stop(_ctx_info, std::forward(_args)...); - } - - static void record(std::string_view name, ompt_id_t id, uint64_t beg_time, - uint64_t end_time, uint64_t thrd_id, uint64_t targ_id, - const context_info_t& common) - { - (void) thrd_id; - (void) targ_id; - - auto _annotate = [&](::perfetto::EventContext ctx) { - if(config::get_perfetto_annotations()) - { - for(const auto& itr : common.arguments) - tracing::add_perfetto_annotation(ctx, itr.label, itr.value); - } - }; - - auto _track = tracing::get_perfetto_track( - category::ompt{}, - [](auto) -> std::string { return "OpenMP Target Offloads"; }, 0); - - category_region::start( - name, _track, beg_time, ::perfetto::Flow::ProcessScoped(id), - std::move(_annotate)); - - category_region::stop(name, _track, - end_time); - } - - void set_prefix(std::string_view _v) { m_prefix = _v; } - -private: - std::string_view m_prefix = {}; -}; -} // namespace component -} // namespace rocprofsys - -namespace tim -{ -namespace trait -{ -template <> -struct ompt_handle -{ - using type = component_tuple<::rocprofsys::component::ompt>; -}; -} // namespace trait -} // namespace tim - -namespace rocprofsys -{ -namespace ompt -{ -namespace -{ -using ompt_handle_t = tim::component::ompt_handle; -using ompt_context_t = tim::openmp::context_handler; -using ompt_toolset_t = typename ompt_handle_t::toolset_type; -using ompt_bundle_t = tim::component_tuple; - -std::unique_ptr f_bundle = {}; -bool _init_toolset_off = (trait::runtime_enabled::set(false), - trait::runtime_enabled::set(false), true); -tim::ompt::finalize_tool_func_t f_finalize = nullptr; -} // namespace - -void -setup() -{ - if(!tim::settings::enabled()) return; - trait::runtime_enabled::set(true); - trait::runtime_enabled::set(true); - tim::auto_lock_t lk{ tim::type_mutex() }; - f_bundle = std::make_unique("rocprofsys/ompt", - quirk::config{}); -} - -void -shutdown() -{ - static bool _protect = false; - if(_protect) return; - _protect = true; - if(f_bundle) - { - if(tim::manager::instance()) tim::manager::instance()->cleanup("rocprofsys-ompt"); - f_bundle->stop(); - ompt_context_t::cleanup(); - trait::runtime_enabled::set(false); - trait::runtime_enabled::set(false); - pthread_gotcha::shutdown(); - // call the OMPT finalize callback - if(f_finalize) - { - for(const auto& itr : tim::openmp::get_ompt_device_functions()) - if(itr.second.stop_trace) itr.second.stop_trace(itr.second.device); - (*f_finalize)(); - f_finalize = nullptr; - } - } - f_bundle.reset(); - _protect = false; -} - -namespace -{ -bool& -use_tool() -{ - static bool _v = false; - return _v; -} - -int -tool_initialize(ompt_function_lookup_t lookup, int initial_device_num, - ompt_data_t* tool_data) -{ - if(!rocprofsys::settings_are_configured()) - { - ROCPROFSYS_BASIC_WARNING_F( - 0, - "[%s] invoked before rocprof-sys was initialized. In instrumentation mode, " - "settings exported to the environment have not been propagated yet...\n", - __FUNCTION__); - use_tool() = get_env("ROCPROFSYS_USE_OMPT", true, false); - } - else - { - use_tool() = rocprofsys::config::get_use_ompt(); - } - - if(use_tool()) - { - ROCPROFSYS_BASIC_VERBOSE_F( - 2, "OpenMP-tools configuring for initial device %i\n\n", initial_device_num); - - static auto _generate_key = [](std::string_view _key_v, - const ::tim::openmp::argument_array_t& _args_v) { - return std::string{ _key_v }; - (void) _args_v; - }; - - tim::openmp::get_codeptr_ra_resolver() = - [](tim::openmp::context_info& _ctx_info) { - const auto& _key = _ctx_info.label; - const auto* codeptr_ra = _ctx_info.codeptr_ra; - auto& _args = _ctx_info.arguments; - - ROCPROFSYS_BASIC_VERBOSE(2, "resolving codeptr return address for %s\n", - _key.data()); - - if(!codeptr_ra) return _generate_key(_key, _args); - - static thread_local auto _once = std::once_flag{}; - std::call_once(_once, []() { ::tim::unwind::update_file_maps(); }); - - auto _info = ::rocprofsys::binary::lookup_ipaddr_entry( - reinterpret_cast(codeptr_ra)); - - if(_info) - { - _ctx_info.func = tim::demangle(_info->name); - if(_info->lineno > 0) - { - auto _linfo = _info->lineinfo.rget([](const auto& _v) -> bool { - return (_v && !_v.location.empty() && _v.line > 0); - }); - - if(_linfo) - { - _ctx_info.file = _linfo.location; - _ctx_info.line = _linfo.line; - _args.emplace_back("file", _ctx_info.file); - _args.emplace_back("lineinfo", - ::timemory::join::join("@", _ctx_info.file, - _ctx_info.line)); - } - else - { - _ctx_info.file = _info->location; - _args.emplace_back("file", _ctx_info.file); - } - - return _generate_key( - ::timemory::join::join(" @ ", _key, _ctx_info.func), _args); - } - else - { - return _generate_key( - ::timemory::join::join(" @ ", _key, _ctx_info.func), _args); - } - } - else - { - auto _dl_info = Dl_info{ nullptr, nullptr, nullptr, nullptr }; - if(dladdr(codeptr_ra, &_dl_info) != 0) - { - _ctx_info.file = _dl_info.dli_fname; - _ctx_info.func = tim::demangle(_dl_info.dli_sname); - _args.emplace_back("file", _ctx_info.file); - return _generate_key( - ::timemory::join::join( - " @ ", _key, - ::timemory::join::join("", _ctx_info.func, " [", - _ctx_info.file, "]")), - _args); - } - } - - // since no line info could be deduced, include the codeptr return address - auto _args_codeptr_v = _args; - _args_codeptr_v.emplace_back("codeptr_ra", codeptr_ra); - return _generate_key(_key, _args_codeptr_v); - }; - - tim::openmp::get_function_lookup_callback< - api_t>() = [](ompt_function_lookup_t, - const std::optional& - params) { - if(!params) return; - - ROCPROFSYS_VERBOSE(3, "[ompt] configuring device %i...\n", - params->device_num); - - auto& device_funcs = - tim::openmp::get_ompt_device_functions().at(params->device_num); - - device_funcs.set_trace_ompt(params->device, 1, ompt_callback_target_data_op); - device_funcs.set_trace_ompt(params->device, 1, ompt_callback_target_submit); - - static ompt_callback_buffer_request_t request = - [](int device_num, ompt_buffer_t** buffer, size_t* bytes) { - ROCPROFSYS_VERBOSE(3, "[ompt] buffer request...\n"); - *bytes = ::tim::units::get_page_size(); - *buffer = mmap(nullptr, *bytes, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - (void) device_num; - }; - - static ompt_callback_buffer_complete_t complete = [](int device_num, - ompt_buffer_t* buffer, - size_t bytes, - ompt_buffer_cursor_t - begin, - int buffer_owned) { - ROCPROFSYS_VERBOSE(3, "[ompt] buffer complete...\n"); - tim::consume_parameters(device_num, buffer, bytes, begin, buffer_owned); - - auto _funcs = - tim::openmp::get_ompt_device_functions().at(device_num); - auto _skew = rocprofsys::tracing::get_clock_skew( - [&_funcs]() { return _funcs.get_device_time(_funcs.device); }); - - ompt_buffer_cursor_t _cursor = begin; - size_t _nrecords = 0; - do - { - if(_cursor == 0) break; - ++_nrecords; - auto* _record = _funcs.get_record_ompt(buffer, _cursor); - if(_record) - { - const char* _type = tim::openmp::get_enum_label(_record->type); - auto _thrd_id = _record->thread_id; - auto _targ_id = _record->target_id; - - unsigned long beg_time = _record->time + _skew; - unsigned long end_time = 0; - ompt_id_t id = 0; - const char* _name = tim::openmp::get_enum_label(_record->type); - - if(_record->type == ompt_callback_target_submit) - { - auto& _data = _record->record.target_kernel; - end_time = _data.end_time + _skew; - id = _data.host_op_id; - - auto _ctx_info = tim::openmp::argument_array_t{ - { "begin_ns", beg_time }, - { "end_ns", end_time }, - { "type", _type }, - { "thread_id", _thrd_id }, - { "target_id", _targ_id }, - { "host_op_id", id }, - { "requested_num_teams", _data.requested_num_teams }, - { "granted_num_teams", _data.granted_num_teams } - }; - - component::ompt::record( - _name, id, beg_time, end_time, _thrd_id, _targ_id, - tim::openmp::context_info{ _name, nullptr, _ctx_info }); - } - else if(_record->type == ompt_callback_target_data_op) - { - auto& _data = _record->record.target_data_op; - end_time = _data.end_time + _skew; - id = _data.host_op_id; - const auto* _opname = - tim::openmp::get_enum_label(_data.optype); - - auto _ctx_info = tim::openmp::argument_array_t{ - { "begin_ns", beg_time }, - { "end_ns", end_time }, - { "type", _type }, - { "thread_id", _thrd_id }, - { "target_id", _targ_id }, - { "host_op_id", id }, - { "optype", _opname }, - { "src_addr", reinterpret_cast(_data.src_addr) }, - { "dst_addr", reinterpret_cast(_data.dest_addr) }, - { "src_device_num", _data.src_device_num }, - { "dst_device_num", _data.dest_device_num }, - { "bytes", _data.bytes }, - }; - - component::ompt::record( - _opname, id, beg_time, end_time, _thrd_id, _targ_id, - tim::openmp::context_info{ _name, nullptr, _ctx_info }); - } - - ROCPROFSYS_VERBOSE( - 3, - "type=%i, type_name=%s, start=%lu, end=%lu, delta=%lu, " - "tid=%lu, target_id=%lu, host_id=%lu\n", - _record->type, tim::openmp::get_enum_label(_record->type), - beg_time, end_time, (end_time - beg_time), _record->thread_id, - _record->target_id, id); - } - - _funcs.advance_buffer_cursor(_funcs.device, buffer, bytes, _cursor, - &_cursor); - } while(_cursor != 0); - - ROCPROFSYS_VERBOSE(3, "[ompt] number of records: %zu\n", _nrecords); - - if(buffer_owned == 1) - { - ::munmap(buffer, bytes); - } - }; - - device_funcs.start_trace(params->device, request, complete); - }; - - f_finalize = tim::ompt::configure(lookup, initial_device_num, tool_data); - } - return 1; // success -} - -void -tool_finalize(ompt_data_t*) -{ - shutdown(); -} -} // namespace -} // namespace ompt -} // namespace rocprofsys - -extern "C" -{ - ompt_start_tool_result_t* ompt_start_tool(unsigned int, - const char*) ROCPROFSYS_PUBLIC_API; - - ompt_start_tool_result_t* ompt_start_tool(unsigned int omp_version, - const char* runtime_version) - { - ROCPROFSYS_BASIC_VERBOSE_F(0, "OpenMP version: %u, runtime version: %s\n", - omp_version, runtime_version); - ROCPROFSYS_METADATA("OMP_VERSION", omp_version); - ROCPROFSYS_METADATA("OMP_RUNTIME_VERSION", runtime_version); - static auto* data = new ompt_start_tool_result_t{ - &rocprofsys::ompt::tool_initialize, &rocprofsys::ompt::tool_finalize, { 0 } - }; - return data; - } -} - -#else -namespace rocprofsys -{ -namespace ompt -{ -void -setup() -{} - -void -shutdown() -{} -} // namespace ompt -} // namespace rocprofsys - -#endif diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/ompt.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/ompt.hpp deleted file mode 100644 index c32f78f751..0000000000 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/ompt.hpp +++ /dev/null @@ -1,35 +0,0 @@ -// MIT License -// -// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -namespace rocprofsys -{ -namespace ompt -{ -void -setup(); - -void -shutdown(); -} // namespace ompt -} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp index 6cb082d0fa..59cf4e72f1 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp @@ -903,14 +903,189 @@ get_kernel_dispatch_timestamps() return _v; } +#if(ROCPROFILER_VERSION >= 600) +// To handle events without finalization, perfetto push must occur in start +// Allows capture of worker thread implicit and sync tasks +void +ompt_tracing_callback_start(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* /*user_data*/, + rocprofiler_timestamp_t ts) +{ + static bool is_first_implicit_call = true; + + // Ignore first ompt_implicit_call as this is created after runtime initialization but + // before first region + // Respective end is also not received due to finalization occurring too late + if(is_first_implicit_call && (record.kind == ROCPROFILER_CALLBACK_TRACING_OMPT && + record.operation == ROCPROFILER_OMPT_ID_implicit_task)) + { + is_first_implicit_call = false; + return; + } + + std::string_view _name = + tool_data->callback_tracing_info.at(record.kind, record.operation); + + // Forces omp_parallel begin and end to have same name, allowing perfetto track to + // connect. This will be changed in the future + if(record.operation == ROCPROFILER_OMPT_ID_parallel_begin) _name = "omp_parallel"; + // Although not necessary to connect them, this forces a unified name instead of + // the whole track being named omp_lock_init + if(record.operation == ROCPROFILER_OMPT_ID_lock_init) _name = "omp_lock"; + + if(get_use_timemory()) + { + component::category_region::start( + _name); + } + + if(get_use_perfetto()) + { + auto args = callback_arg_array_t{}; + if(config::get_perfetto_annotations()) + { + rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, 1, + &args); + } + + uint64_t _beg_ts = ts; + auto stream_id = stream_id_top(); + + tracing::push_perfetto_ts( + category::rocm_ompt_api{}, _name.data(), _beg_ts, + ::perfetto::Flow::ProcessScoped(record.correlation_id.internal), + [&](::perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + { + tracing::add_perfetto_annotation(ctx, "begin_ns", _beg_ts); + tracing::add_perfetto_annotation(ctx, "corr_id", + record.correlation_id.internal); + if(stream_id.handle != 0) + tracing::add_perfetto_annotation(ctx, "stream_id", + stream_id.handle); + for(const auto& [key, val] : args) + { + tracing::add_perfetto_annotation(ctx, key, val); + } + } + }); + } +} + +void +ompt_tracing_callback_stop( + rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t* /*user_data*/, + rocprofiler_timestamp_t ts, + std::optional>& _bt_data) +{ + std::string_view _name = + tool_data->callback_tracing_info.at(record.kind, record.operation); + + // Forces omp_parallel begin and end to have same name, allowing perfetto track to + // connect. This will be changed in the future + if(record.operation == ROCPROFILER_OMPT_ID_parallel_end) _name = "omp_parallel"; + // Although not necessary to connect them, this forces a unified name instead of + // the whole track being named omp_lock_init + if(record.operation == ROCPROFILER_OMPT_ID_lock_destroy) _name = "omp_lock"; + + if(get_use_timemory()) + { + component::category_region::stop( + _name); + } + + if(get_use_perfetto()) + { + auto args = callback_arg_array_t{}; + if(config::get_perfetto_annotations()) + { + rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, 2, + &args); + } + uint64_t _end_ts = ts; + tracing::pop_perfetto_ts( + category::rocm_ompt_api{}, _name.data(), _end_ts, + [&](::perfetto::EventContext ctx) { + if(config::get_perfetto_annotations()) + tracing::add_perfetto_annotation(ctx, "end_ns", _end_ts); + if(_bt_data && !_bt_data->empty()) + { + const std::string _unk = "??"; + size_t _bt_cnt = 0; + for(const auto& itr : *_bt_data) + { + auto _linfo = itr.lineinfo.get(); + const auto* _func = (itr.name.empty()) ? &_unk : &itr.name; + const auto* _loc = + (_linfo && !_linfo.location.empty()) + ? &_linfo.location + : ((itr.location.empty()) ? &_unk : &itr.location); + auto _line = (_linfo && _linfo.line > 0) + ? join("", _linfo.line) + : ((itr.lineno == 0) ? std::string{ "?" } + : join("", itr.lineno)); + auto _entry = join("", demangle(*_func), " @ ", + join(':', ::basename(_loc->c_str()), _line)); + if(_bt_cnt < 10) + { + // Prepend zero for better ordering in UI. Only one zero + // is ever necessary since stack depth is limited to 16. + tracing::add_perfetto_annotation( + ctx, join("", "frame#0", _bt_cnt++), _entry); + } + else + { + tracing::add_perfetto_annotation( + ctx, join("", "frame#", _bt_cnt++), _entry); + } + } + } + }); + } +} + +#endif + void tool_tracing_callback(rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t* user_data, void* /*callback_data*/) { + using backtrace_entry_vec_t = std::vector; + auto _bt_data = std::optional{}; + auto populate_backtrace_data = [&]() { + constexpr size_t backtrace_stack_depth = 16; + constexpr size_t backtrace_ignore_depth = 3; + constexpr bool backtrace_with_signal_frame = true; + auto use_perfetto = + (config::get_use_perfetto() && config::get_perfetto_annotations()); + auto use_rocpd = config::get_use_rocpd(); + + if((use_perfetto || use_rocpd) && + tool_data->backtrace_operations.at(record.kind).count(record.operation) > 0) + { + auto _backtrace = + tim::get_unw_stack(); + _bt_data = backtrace_entry_vec_t{}; + _bt_data->reserve(_backtrace.size()); + for(auto itr : _backtrace) + { + if(itr) + { + if(auto _val = binary::lookup_ipaddr_entry(itr->address()); + _val) + { + _bt_data->emplace_back(std::move(*_val)); + } + } + } + } + }; + auto ts = rocprofiler_timestamp_t{}; ROCPROFILER_CALL(rocprofiler_get_timestamp(&ts)); + const char* name = ""; - const char* name = nullptr; rocprofiler_query_callback_tracing_kind_operation_name(record.kind, record.operation, &name, nullptr); @@ -921,6 +1096,13 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, << ", phase=" << record.phase << ", dt_nsec=" << std::setw(8) << ts << ", name=" << name; + if(rocprofsys::get_state() != rocprofsys::State::Active) + { + ROCPROFSYS_WARNING_F(0, "Callback called when tool is not active.\n\t%s\n", + info.str().c_str()); + return; + } + if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) { user_data->value = ts; @@ -949,6 +1131,11 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, break; } #if(ROCPROFILER_VERSION >= 600) + case ROCPROFILER_CALLBACK_TRACING_OMPT: + { + ompt_tracing_callback_start(record, user_data, ts); + break; + } case ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API: { tool_tracing_callback_start(category::rocm_rocdecode_api{}, record, @@ -979,7 +1166,7 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, case ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH: case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY: #if(ROCPROFILER_VERSION >= 600) - case ROCPROFILER_CALLBACK_TRACING_OMPT: + case ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION: case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION: #endif #if(ROCPROFILER_VERSION >= 700) @@ -1000,36 +1187,7 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, } else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) { - using backtrace_entry_vec_t = std::vector; - - constexpr size_t bt_stack_depth = 16; - constexpr size_t bt_ignore_depth = 3; - constexpr bool bt_with_signal_frame = true; - - auto _bt_data = std::optional{}; - auto use_perfetto = - (config::get_use_perfetto() && config::get_perfetto_annotations()); - auto use_rocpd = config::get_use_rocpd(); - - if((use_perfetto || use_rocpd) && - tool_data->backtrace_operations.at(record.kind).count(record.operation) > 0) - { - auto _backtrace = tim::get_unw_stack(); - _bt_data = backtrace_entry_vec_t{}; - _bt_data->reserve(_backtrace.size()); - for(auto itr : _backtrace) - { - if(itr) - { - if(auto _val = binary::lookup_ipaddr_entry(itr->address()); - _val) - { - _bt_data->emplace_back(std::move(*_val)); - } - } - } - } + populate_backtrace_data(); switch(record.kind) { @@ -1056,6 +1214,11 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, break; } #if(ROCPROFILER_VERSION >= 600) + case ROCPROFILER_CALLBACK_TRACING_OMPT: + { + ompt_tracing_callback_stop(record, user_data, ts, _bt_data); + break; + } case ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API: { tool_tracing_callback_stop(category::rocm_rocdecode_api{}, record, @@ -1087,7 +1250,7 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, case ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH: case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY: #if(ROCPROFILER_VERSION >= 600) - case ROCPROFILER_CALLBACK_TRACING_OMPT: + case ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION: case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION: #endif #if(ROCPROFILER_VERSION >= 700) @@ -1125,6 +1288,77 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, } } break; +#if(ROCPROFILER_VERSION >= 600) + case ROCPROFILER_CALLBACK_TRACING_OMPT: + { + // Callbacks that are received but that we do not process + static const std::set ompt_no_process = { + ROCPROFILER_OMPT_ID_callback_functions, // "Fake" callback + // There is no point in handling ompt_thread_begin events as the + // corresponding ompt_thread_end event will not occur unless + // runtime is finalized earlier + ROCPROFILER_OMPT_ID_thread_begin, + ROCPROFILER_OMPT_ID_thread_end, + }; + + auto ompt_operation_type = + static_cast(record.operation); + if(ompt_no_process.find(ompt_operation_type) != ompt_no_process.end()) + return; + + populate_backtrace_data(); + + switch(ompt_operation_type) + { + case ROCPROFILER_OMPT_ID_parallel_begin: + ompt_tracing_callback_start(record, user_data, ts); + break; + case ROCPROFILER_OMPT_ID_parallel_end: + ompt_tracing_callback_stop(record, user_data, ts, _bt_data); + break; + case ROCPROFILER_OMPT_ID_lock_init: + ompt_tracing_callback_start(record, user_data, ts); + break; + case ROCPROFILER_OMPT_ID_lock_destroy: + ompt_tracing_callback_stop(record, user_data, ts, _bt_data); + break; + // Although this has endpoint arg, treat it as instant event + case ROCPROFILER_OMPT_ID_nest_lock: + case ROCPROFILER_OMPT_ID_dispatch: + case ROCPROFILER_OMPT_ID_flush: + case ROCPROFILER_OMPT_ID_cancel: + case ROCPROFILER_OMPT_ID_device_initialize: + case ROCPROFILER_OMPT_ID_device_finalize: + case ROCPROFILER_OMPT_ID_device_load: + // case ROCPROFILER_OMPT_ID_device_unload: // Unsupported by runtime + case ROCPROFILER_OMPT_ID_task_create: + case ROCPROFILER_OMPT_ID_task_schedule: + case ROCPROFILER_OMPT_ID_mutex_released: + case ROCPROFILER_OMPT_ID_mutex_acquire: + case ROCPROFILER_OMPT_ID_mutex_acquired: + case ROCPROFILER_OMPT_ID_dependences: + case ROCPROFILER_OMPT_ID_task_dependence: + case ROCPROFILER_OMPT_ID_error: + { + // These callbacks are considered instant events and should start + // and immediately call stop as no corresponding "end" will be + // received + ompt_tracing_callback_start(record, user_data, ts); + ROCPROFILER_CALL( + rocprofiler_get_timestamp(&ts)); // Set artificial end ts + ompt_tracing_callback_stop(record, user_data, ts, _bt_data); + break; + } + default: + ROCPROFSYS_WARNING_F( + 1, + "tool_tracing_callback: unhandled PHASE_NONE " + "callback record\n\t%s\n", + info.str().c_str()); + } + } + break; +#endif default: { ROCPROFSYS_WARNING_F(1, @@ -1138,6 +1372,9 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, else { ROCPROFSYS_CI_ABORT(true, "unhandled callback record phase: %i\n", record.phase); + ROCPROFSYS_WARNING_F(1, + "tool_tracing_callback: unhandled callback record\n\t%s\n", + info.str().c_str()); } } @@ -1706,6 +1943,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data) ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API, ROCPROFILER_CALLBACK_TRACING_RCCL_API, #if(ROCPROFILER_VERSION >= 600) + ROCPROFILER_CALLBACK_TRACING_OMPT, ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API, #endif #if(ROCPROFILER_VERSION >= 700) diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake index ef7d9c69a9..2391d1ac81 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake @@ -51,7 +51,7 @@ if(NOT EXISTS "${_rocm_llvm_lib}/libomptarget.so" AND ROCPROFSYS_USE_ROCM) endif() if(ROCPROFSYS_OPENMP_USING_LIBOMP_LIBRARY AND ROCPROFSYS_USE_OMPT) - set(_OMPT_PASS_REGEX "\\|_ompt_") + set(_OMPT_PASS_REGEX "\\|_omp_") else() set(_OMPT_PASS_REGEX "") endif()