Use rocprofiler-SDK for OMPT tracing (#702)
Switch to using SDK for OMPT tracing and remove older OMPT code path
Šī revīzija ir iekļauta:
revīziju iesūtīja
GitHub
vecāks
5f4e0dc889
revīzija
07a7b9b845
@@ -474,7 +474,7 @@ jobs:
|
||||
-DROCPROFSYS_USE_ROCM=OFF
|
||||
-DROCPROFSYS_USE_RCCL=OFF
|
||||
-DROCPROFSYS_MAX_THREADS=64
|
||||
-DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode;jpegdecode;openmp-target"
|
||||
-DROCPROFSYS_DISABLE_EXAMPLES="transpose;rccl;videodecode;jpegdecode;openmp"
|
||||
-DROCPROFSYS_BUILD_NUMBER=${{ github.run_attempt }}
|
||||
--
|
||||
-LE "transpose|rccl|videodecode|jpegdecode|network"
|
||||
|
||||
@@ -630,12 +630,7 @@ set(TIMEMORY_QUIET_CONFIG ON CACHE BOOL "Make timemory configuration quieter")
|
||||
# timemory feature settings
|
||||
set(TIMEMORY_USE_GOTCHA ON CACHE BOOL "Enable GOTCHA support in timemory")
|
||||
set(TIMEMORY_USE_PERFETTO OFF CACHE BOOL "Disable perfetto support in timemory")
|
||||
set(TIMEMORY_USE_OMPT
|
||||
${ROCPROFSYS_USE_OMPT}
|
||||
CACHE BOOL
|
||||
"Enable OMPT support in timemory"
|
||||
FORCE
|
||||
)
|
||||
set(TIMEMORY_USE_OMPT OFF CACHE BOOL "Enable OMPT support in timemory" FORCE)
|
||||
set(TIMEMORY_USE_PAPI
|
||||
${ROCPROFSYS_USE_PAPI}
|
||||
CACHE BOOL
|
||||
|
||||
@@ -140,6 +140,12 @@ run_impl()
|
||||
int
|
||||
run()
|
||||
{
|
||||
#if _OPENMP == 202011
|
||||
std::cout << "Compiler OpenMP version == 202011" << std::endl;
|
||||
#elif _OPENMP == 201811
|
||||
std::cout << "Compiler OpenMP version == 201811" << std::endl;
|
||||
#endif
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
run_impl();
|
||||
|
||||
@@ -134,10 +134,6 @@ get_initial_environment()
|
||||
|
||||
update_env(_env, "ROCPROFSYS_USE_SAMPLING", (_mode != "causal"));
|
||||
|
||||
#if defined(ROCPROFSYS_USE_OMPT)
|
||||
if(!getenv("OMP_TOOL_LIBRARIES"))
|
||||
update_env(_env, "OMP_TOOL_LIBRARIES", _dl_libpath, UPD_APPEND);
|
||||
#endif
|
||||
return _env;
|
||||
}
|
||||
|
||||
@@ -769,9 +765,6 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
|
||||
_update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0);
|
||||
|
||||
if(_v.count("all") > 0 || _v.count("ompt") > 0)
|
||||
update_env(_env, "OMP_TOOL_LIBRARIES", _dl_libpath, UPD_APPEND);
|
||||
|
||||
if(_v.count("all") > 0 || _v.count("kokkosp") > 0)
|
||||
update_env(_env, "KOKKOS_TOOLS_LIBS", _omni_libpath, UPD_APPEND);
|
||||
});
|
||||
@@ -793,15 +786,6 @@ parse_args(int argc, char** argv, std::vector<char*>& _env)
|
||||
_update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0);
|
||||
|
||||
// if(_v.count("all") > 0 || _v.count("rocprofiler") > 0)
|
||||
// {
|
||||
// remove_env(_env, "ROCP_TOOL_LIB");
|
||||
// remove_env(_env, "ROCP_HSA_INTERCEPT");
|
||||
// }
|
||||
|
||||
if(_v.count("all") > 0 || _v.count("ompt") > 0)
|
||||
remove_env(_env, "OMP_TOOL_LIBRARIES");
|
||||
|
||||
if(_v.count("all") > 0 || _v.count("kokkosp") > 0)
|
||||
remove_env(_env, "KOKKOS_TOOLS_LIBS");
|
||||
});
|
||||
|
||||
@@ -109,25 +109,6 @@ get_environ(int _verbose, std::string _search_paths = {},
|
||||
_omnilib = common::path::find_path(_omnilib, _verbose, _search_paths);
|
||||
_omnilib_dl = common::path::find_path(_omnilib_dl, _verbose, _search_paths);
|
||||
|
||||
#if defined(ROCPROFSYS_USE_OMPT) && ROCPROFSYS_USE_OMPT > 0
|
||||
if(get_env("ROCPROFSYS_USE_OMPT", true))
|
||||
{
|
||||
std::string _omni_omp_libs = _omnilib_dl;
|
||||
const char* _omp_libs = getenv("OMP_TOOL_LIBRARIES");
|
||||
int _override = 0;
|
||||
if(_omp_libs != nullptr &&
|
||||
std::string_view{ _omp_libs }.find(_omnilib_dl) == std::string::npos)
|
||||
{
|
||||
_override = 1;
|
||||
_omni_omp_libs = common::join(':', _omp_libs, _omnilib_dl);
|
||||
}
|
||||
ROCPROFSYS_SETUP_LOG(_verbose >= 2, "setting OMP_TOOL_LIBRARIES to '%s'\n",
|
||||
_omni_omp_libs.c_str());
|
||||
_data.emplace_back(
|
||||
env_config{ "OMP_TOOL_LIBRARIES", _omni_omp_libs.c_str(), _override });
|
||||
}
|
||||
#endif
|
||||
|
||||
return _data;
|
||||
}
|
||||
|
||||
|
||||
@@ -239,11 +239,6 @@ init_parser(parser_data& _data)
|
||||
auto _libexecpath = get_realpath(get_internal_script_path());
|
||||
update_env(_data, "ROCPROFSYS_SCRIPT_PATH", _libexecpath, UPD_REPLACE);
|
||||
|
||||
#if defined(ROCPROFSYS_USE_OMPT)
|
||||
if(!getenv("OMP_TOOL_LIBRARIES"))
|
||||
update_env(_data, "OMP_TOOL_LIBRARIES", _data.dl_libpath, UPD_PREPEND);
|
||||
#endif
|
||||
|
||||
return _data;
|
||||
}
|
||||
|
||||
@@ -619,10 +614,6 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
|
||||
_update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0);
|
||||
|
||||
if(_v.count("all") > 0 || _v.count("ompt") > 0)
|
||||
update_env(_data, "OMP_TOOL_LIBRARIES", _data.dl_libpath,
|
||||
UPD_PREPEND);
|
||||
|
||||
if(_v.count("all") > 0 || _v.count("kokkosp") > 0)
|
||||
update_env(_data, "KOKKOS_TOOLS_LIBS", _data.omni_libpath,
|
||||
UPD_PREPEND);
|
||||
@@ -653,9 +644,6 @@ add_core_arguments(parser_t& _parser, parser_data& _data)
|
||||
_update("ROCPROFSYS_TRACE_THREAD_RW_LOCKS", _v.count("rw-locks") > 0);
|
||||
_update("ROCPROFSYS_TRACE_THREAD_SPIN_LOCKS", _v.count("spin-locks") > 0);
|
||||
|
||||
if(_v.count("all") > 0 || _v.count("ompt") > 0)
|
||||
remove_env(_data, "OMP_TOOL_LIBRARIES");
|
||||
|
||||
if(_v.count("all") > 0 || _v.count("kokkosp") > 0)
|
||||
remove_env(_data, "KOKKOS_TOOLS_LIBS");
|
||||
});
|
||||
|
||||
@@ -105,6 +105,7 @@ ROCPROFSYS_DEFINE_CATEGORY(category, rocm_marker_api, ROCPROFSYS_CATEGORY_ROCM_M
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rocdecode_api, ROCPROFSYS_CATEGORY_ROCM_ROCDECODE_API, "rocm_rocdecode_api", "ROCm RocDecode API")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rocjpeg_api, ROCPROFSYS_CATEGORY_ROCM_ROCJPEG_API, "rocm_rocjpeg_api", "ROCm RocJPEG API")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_rccl_api, ROCPROFSYS_CATEGORY_ROCM_RCCL_API, "rocm_rccl_api", "ROCm RCCL API")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_ompt_api, ROCPROFSYS_CATEGORY_ROCM_OMPT_API, "rocm_ompt_api", "ROCm OMPT API")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi, ROCPROFSYS_CATEGORY_AMD_SMI, "amd_smi", "AMD-SMI data")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_gfx_busy, ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_GFX, "device_busy_gfx", "Busy percentage of GFX engine on a GPU device")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, amd_smi_umc_busy, ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_UMC, "device_busy_umc", "Busy percentage of UMC engin on a GPU device")
|
||||
@@ -177,6 +178,7 @@ using name = perfetto_category<Tp...>;
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_rocdecode_api), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_rocjpeg_api), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_rccl_api), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_ompt_api), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_gfx_busy), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::amd_smi_umc_busy), \
|
||||
|
||||
@@ -1930,12 +1930,8 @@ get_use_vaapi_tracing()
|
||||
bool
|
||||
get_use_ompt()
|
||||
{
|
||||
#if defined(TIMEMORY_USE_OMPT)
|
||||
static auto _v = get_config()->find("ROCPROFSYS_USE_OMPT");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
|
||||
@@ -414,6 +414,7 @@ get_callback_domains()
|
||||
{
|
||||
// Argument tracing is supported in rocprofiler-sdk 0.6.0 and later
|
||||
supported.emplace(ROCPROFILER_CALLBACK_TRACING_RCCL_API);
|
||||
supported.emplace(ROCPROFILER_CALLBACK_TRACING_OMPT);
|
||||
supported.emplace(ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API);
|
||||
}
|
||||
# endif
|
||||
@@ -436,6 +437,15 @@ get_callback_domains()
|
||||
_data.emplace(ROCPROFILER_CALLBACK_TRACING_RCCL_API);
|
||||
}
|
||||
|
||||
# if ROCPROFILER_VERSION >= 600
|
||||
if(config::get_use_ompt() && _version.formatted >= 600)
|
||||
{
|
||||
// Translate some configuration settings to rocprofiler domains
|
||||
_data.emplace(ROCPROFILER_CALLBACK_TRACING_OMPT);
|
||||
}
|
||||
# endif
|
||||
|
||||
// Check that the domains are valid
|
||||
const auto valid_choices =
|
||||
settings::instance()->at("ROCPROFSYS_ROCM_DOMAINS")->get_choices();
|
||||
|
||||
|
||||
@@ -57,6 +57,7 @@ extern "C"
|
||||
ROCPROFSYS_CATEGORY_ROCM_ROCDECODE_API,
|
||||
ROCPROFSYS_CATEGORY_ROCM_ROCJPEG_API,
|
||||
ROCPROFSYS_CATEGORY_ROCM_RCCL_API,
|
||||
ROCPROFSYS_CATEGORY_ROCM_OMPT_API,
|
||||
ROCPROFSYS_CATEGORY_AMD_SMI,
|
||||
ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_GFX,
|
||||
ROCPROFSYS_CATEGORY_AMD_SMI_BUSY_UMC,
|
||||
|
||||
@@ -56,7 +56,6 @@
|
||||
#include "library/components/pthread_gotcha.hpp"
|
||||
#include "library/components/vaapi_gotcha.hpp"
|
||||
#include "library/coverage.hpp"
|
||||
#include "library/ompt.hpp"
|
||||
#include "library/process_sampler.hpp"
|
||||
#include "library/ptl.hpp"
|
||||
#include "library/rocprofiler-sdk.hpp"
|
||||
@@ -85,6 +84,7 @@
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
# include <rocprofiler-sdk/agent.h>
|
||||
# include <rocprofiler-sdk/registration.h>
|
||||
#endif
|
||||
|
||||
#include <atomic>
|
||||
@@ -602,12 +602,6 @@ rocprofsys_init_tooling_hidden(void)
|
||||
}
|
||||
}
|
||||
|
||||
if(get_use_ompt())
|
||||
{
|
||||
ROCPROFSYS_VERBOSE_F(1, "Setting up OMPT...\n");
|
||||
ompt::setup();
|
||||
}
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
ROCPROFSYS_VERBOSE_F(1, "Starting Perfetto...\n");
|
||||
@@ -853,12 +847,6 @@ rocprofsys_finalize_hidden(void)
|
||||
component::vaapi_gotcha::shutdown();
|
||||
}
|
||||
|
||||
if(get_use_ompt())
|
||||
{
|
||||
ROCPROFSYS_VERBOSE_F(1, "Shutting down OMPT...\n");
|
||||
ompt::shutdown();
|
||||
}
|
||||
|
||||
#if defined(ROCPROFSYS_USE_ROCM) && ROCPROFSYS_USE_ROCM > 0
|
||||
if(get_use_rocm())
|
||||
{
|
||||
|
||||
@@ -3,7 +3,6 @@ set(library_sources
|
||||
${CMAKE_CURRENT_LIST_DIR}/coverage.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/cpu_freq.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/kokkosp.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/ompt.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/perf.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/process_sampler.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/ptl.cpp
|
||||
@@ -17,7 +16,6 @@ set(library_sources
|
||||
set(library_headers
|
||||
${CMAKE_CURRENT_LIST_DIR}/coverage.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/cpu_freq.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/ompt.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/process_sampler.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/perf.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/ptl.hpp
|
||||
@@ -58,7 +56,6 @@ set(ndebug_sources
|
||||
${CMAKE_CURRENT_LIST_DIR}/components/backtrace_metrics.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/kokkosp.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/amd_smi.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/ompt.cpp
|
||||
)
|
||||
|
||||
set_source_files_properties(
|
||||
|
||||
@@ -1,539 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "api.hpp"
|
||||
#include "core/common.hpp"
|
||||
#include "core/config.hpp"
|
||||
#include "core/debug.hpp"
|
||||
#include "core/defines.hpp"
|
||||
|
||||
#include <timemory/defines.h>
|
||||
|
||||
#if defined(ROCPROFSYS_USE_OMPT) && ROCPROFSYS_USE_OMPT > 0
|
||||
|
||||
# include "binary/link_map.hpp"
|
||||
# include "core/components/fwd.hpp"
|
||||
# include "library/components/category_region.hpp"
|
||||
# include "library/tracing.hpp"
|
||||
|
||||
# include <timemory/components/ompt.hpp>
|
||||
# include <timemory/components/ompt/backends.hpp>
|
||||
# include <timemory/components/ompt/context.hpp>
|
||||
# include <timemory/components/ompt/context_handler.hpp>
|
||||
# include <timemory/components/ompt/extern.hpp>
|
||||
# include <timemory/components/ompt/tool.hpp>
|
||||
# include <timemory/mpl/type_traits.hpp>
|
||||
# include <timemory/timemory.hpp>
|
||||
# include <timemory/units.hpp>
|
||||
# include <timemory/unwind/addr2line.hpp>
|
||||
# include <timemory/utility/demangle.hpp>
|
||||
# include <timemory/utility/join.hpp>
|
||||
# include <timemory/utility/types.hpp>
|
||||
|
||||
# include <dlfcn.h>
|
||||
# include <memory>
|
||||
# include <sys/mman.h>
|
||||
# include <sys/types.h>
|
||||
|
||||
using api_t = tim::project::rocprofsys;
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace component
|
||||
{
|
||||
struct ompt : comp::base<ompt, void>
|
||||
{
|
||||
using value_type = void;
|
||||
using base_type = comp::base<ompt, void>;
|
||||
using context_info_t = tim::openmp::context_info;
|
||||
|
||||
static std::string label() { return "ompt"; }
|
||||
static std::string description() { return "OpenMP tools tracing"; }
|
||||
|
||||
ompt() = default;
|
||||
~ompt() = default;
|
||||
ompt(const ompt&) = default;
|
||||
ompt(ompt&&) noexcept = default;
|
||||
|
||||
ompt& operator=(const ompt&) = default;
|
||||
ompt& operator=(ompt&&) noexcept = default;
|
||||
|
||||
template <typename... Args>
|
||||
void start(const context_info_t& _ctx_info, Args&&...) const
|
||||
{
|
||||
category_region<category::ompt>::start<tim::quirk::timemory>(m_prefix);
|
||||
|
||||
auto _ts = tracing::now();
|
||||
uint64_t _cid =
|
||||
(_ctx_info.target_arguments) ? _ctx_info.target_arguments->host_op_id : 0;
|
||||
auto _annotate = [&](::perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(ctx, "begin_ns", _ts);
|
||||
for(const auto& itr : _ctx_info.arguments)
|
||||
tracing::add_perfetto_annotation(ctx, itr.label, itr.value);
|
||||
}
|
||||
};
|
||||
|
||||
if(_cid > 0)
|
||||
{
|
||||
category_region<category::ompt>::start<tim::quirk::perfetto>(
|
||||
(_ctx_info.func.empty()) ? m_prefix : _ctx_info.func, _ts,
|
||||
::perfetto::Flow::ProcessScoped(_cid), std::move(_annotate));
|
||||
}
|
||||
else
|
||||
{
|
||||
category_region<category::ompt>::start<tim::quirk::perfetto>(
|
||||
(_ctx_info.func.empty()) ? m_prefix : _ctx_info.func, _ts,
|
||||
std::move(_annotate));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
void stop(const context_info_t& _ctx_info, Args&&...) const
|
||||
{
|
||||
category_region<category::ompt>::stop<tim::quirk::timemory>(m_prefix);
|
||||
|
||||
auto _ts = tracing::now();
|
||||
uint64_t _cid =
|
||||
(_ctx_info.target_arguments) ? _ctx_info.target_arguments->host_op_id : 0;
|
||||
auto _annotate = [&](::perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(ctx, "end_ns", _ts);
|
||||
for(const auto& itr : _ctx_info.arguments)
|
||||
tracing::add_perfetto_annotation(ctx, itr.label, itr.value);
|
||||
}
|
||||
};
|
||||
|
||||
if(_cid > 0)
|
||||
{
|
||||
category_region<category::ompt>::stop<tim::quirk::perfetto>(
|
||||
(_ctx_info.func.empty()) ? m_prefix : _ctx_info.func, _ts,
|
||||
std::move(_annotate));
|
||||
}
|
||||
else
|
||||
{
|
||||
category_region<category::ompt>::stop<tim::quirk::perfetto>(
|
||||
(_ctx_info.func.empty()) ? m_prefix : _ctx_info.func, _ts,
|
||||
std::move(_annotate));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
void store(const context_info_t& _ctx_info, Args&&... _args) const
|
||||
{
|
||||
start(_ctx_info, std::forward<Args>(_args)...);
|
||||
stop(_ctx_info, std::forward<Args>(_args)...);
|
||||
}
|
||||
|
||||
static void record(std::string_view name, ompt_id_t id, uint64_t beg_time,
|
||||
uint64_t end_time, uint64_t thrd_id, uint64_t targ_id,
|
||||
const context_info_t& common)
|
||||
{
|
||||
(void) thrd_id;
|
||||
(void) targ_id;
|
||||
|
||||
auto _annotate = [&](::perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
for(const auto& itr : common.arguments)
|
||||
tracing::add_perfetto_annotation(ctx, itr.label, itr.value);
|
||||
}
|
||||
};
|
||||
|
||||
auto _track = tracing::get_perfetto_track(
|
||||
category::ompt{},
|
||||
[](auto) -> std::string { return "OpenMP Target Offloads"; }, 0);
|
||||
|
||||
category_region<category::ompt>::start<tim::quirk::perfetto>(
|
||||
name, _track, beg_time, ::perfetto::Flow::ProcessScoped(id),
|
||||
std::move(_annotate));
|
||||
|
||||
category_region<category::ompt>::stop<tim::quirk::perfetto>(name, _track,
|
||||
end_time);
|
||||
}
|
||||
|
||||
void set_prefix(std::string_view _v) { m_prefix = _v; }
|
||||
|
||||
private:
|
||||
std::string_view m_prefix = {};
|
||||
};
|
||||
} // namespace component
|
||||
} // namespace rocprofsys
|
||||
|
||||
namespace tim
|
||||
{
|
||||
namespace trait
|
||||
{
|
||||
template <>
|
||||
struct ompt_handle<api_t>
|
||||
{
|
||||
using type = component_tuple<::rocprofsys::component::ompt>;
|
||||
};
|
||||
} // namespace trait
|
||||
} // namespace tim
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace ompt
|
||||
{
|
||||
namespace
|
||||
{
|
||||
using ompt_handle_t = tim::component::ompt_handle<api_t>;
|
||||
using ompt_context_t = tim::openmp::context_handler<api_t>;
|
||||
using ompt_toolset_t = typename ompt_handle_t::toolset_type;
|
||||
using ompt_bundle_t = tim::component_tuple<ompt_handle_t>;
|
||||
|
||||
std::unique_ptr<ompt_bundle_t> f_bundle = {};
|
||||
bool _init_toolset_off = (trait::runtime_enabled<ompt_toolset_t>::set(false),
|
||||
trait::runtime_enabled<ompt_context_t>::set(false), true);
|
||||
tim::ompt::finalize_tool_func_t f_finalize = nullptr;
|
||||
} // namespace
|
||||
|
||||
void
|
||||
setup()
|
||||
{
|
||||
if(!tim::settings::enabled()) return;
|
||||
trait::runtime_enabled<ompt_toolset_t>::set(true);
|
||||
trait::runtime_enabled<ompt_context_t>::set(true);
|
||||
tim::auto_lock_t lk{ tim::type_mutex<ompt_handle_t>() };
|
||||
f_bundle = std::make_unique<ompt_bundle_t>("rocprofsys/ompt",
|
||||
quirk::config<quirk::auto_start>{});
|
||||
}
|
||||
|
||||
void
|
||||
shutdown()
|
||||
{
|
||||
static bool _protect = false;
|
||||
if(_protect) return;
|
||||
_protect = true;
|
||||
if(f_bundle)
|
||||
{
|
||||
if(tim::manager::instance()) tim::manager::instance()->cleanup("rocprofsys-ompt");
|
||||
f_bundle->stop();
|
||||
ompt_context_t::cleanup();
|
||||
trait::runtime_enabled<ompt_toolset_t>::set(false);
|
||||
trait::runtime_enabled<ompt_context_t>::set(false);
|
||||
pthread_gotcha::shutdown();
|
||||
// call the OMPT finalize callback
|
||||
if(f_finalize)
|
||||
{
|
||||
for(const auto& itr : tim::openmp::get_ompt_device_functions<api_t>())
|
||||
if(itr.second.stop_trace) itr.second.stop_trace(itr.second.device);
|
||||
(*f_finalize)();
|
||||
f_finalize = nullptr;
|
||||
}
|
||||
}
|
||||
f_bundle.reset();
|
||||
_protect = false;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
bool&
|
||||
use_tool()
|
||||
{
|
||||
static bool _v = false;
|
||||
return _v;
|
||||
}
|
||||
|
||||
int
|
||||
tool_initialize(ompt_function_lookup_t lookup, int initial_device_num,
|
||||
ompt_data_t* tool_data)
|
||||
{
|
||||
if(!rocprofsys::settings_are_configured())
|
||||
{
|
||||
ROCPROFSYS_BASIC_WARNING_F(
|
||||
0,
|
||||
"[%s] invoked before rocprof-sys was initialized. In instrumentation mode, "
|
||||
"settings exported to the environment have not been propagated yet...\n",
|
||||
__FUNCTION__);
|
||||
use_tool() = get_env("ROCPROFSYS_USE_OMPT", true, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
use_tool() = rocprofsys::config::get_use_ompt();
|
||||
}
|
||||
|
||||
if(use_tool())
|
||||
{
|
||||
ROCPROFSYS_BASIC_VERBOSE_F(
|
||||
2, "OpenMP-tools configuring for initial device %i\n\n", initial_device_num);
|
||||
|
||||
static auto _generate_key = [](std::string_view _key_v,
|
||||
const ::tim::openmp::argument_array_t& _args_v) {
|
||||
return std::string{ _key_v };
|
||||
(void) _args_v;
|
||||
};
|
||||
|
||||
tim::openmp::get_codeptr_ra_resolver<api_t>() =
|
||||
[](tim::openmp::context_info& _ctx_info) {
|
||||
const auto& _key = _ctx_info.label;
|
||||
const auto* codeptr_ra = _ctx_info.codeptr_ra;
|
||||
auto& _args = _ctx_info.arguments;
|
||||
|
||||
ROCPROFSYS_BASIC_VERBOSE(2, "resolving codeptr return address for %s\n",
|
||||
_key.data());
|
||||
|
||||
if(!codeptr_ra) return _generate_key(_key, _args);
|
||||
|
||||
static thread_local auto _once = std::once_flag{};
|
||||
std::call_once(_once, []() { ::tim::unwind::update_file_maps(); });
|
||||
|
||||
auto _info = ::rocprofsys::binary::lookup_ipaddr_entry<false>(
|
||||
reinterpret_cast<uintptr_t>(codeptr_ra));
|
||||
|
||||
if(_info)
|
||||
{
|
||||
_ctx_info.func = tim::demangle(_info->name);
|
||||
if(_info->lineno > 0)
|
||||
{
|
||||
auto _linfo = _info->lineinfo.rget([](const auto& _v) -> bool {
|
||||
return (_v && !_v.location.empty() && _v.line > 0);
|
||||
});
|
||||
|
||||
if(_linfo)
|
||||
{
|
||||
_ctx_info.file = _linfo.location;
|
||||
_ctx_info.line = _linfo.line;
|
||||
_args.emplace_back("file", _ctx_info.file);
|
||||
_args.emplace_back("lineinfo",
|
||||
::timemory::join::join("@", _ctx_info.file,
|
||||
_ctx_info.line));
|
||||
}
|
||||
else
|
||||
{
|
||||
_ctx_info.file = _info->location;
|
||||
_args.emplace_back("file", _ctx_info.file);
|
||||
}
|
||||
|
||||
return _generate_key(
|
||||
::timemory::join::join(" @ ", _key, _ctx_info.func), _args);
|
||||
}
|
||||
else
|
||||
{
|
||||
return _generate_key(
|
||||
::timemory::join::join(" @ ", _key, _ctx_info.func), _args);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto _dl_info = Dl_info{ nullptr, nullptr, nullptr, nullptr };
|
||||
if(dladdr(codeptr_ra, &_dl_info) != 0)
|
||||
{
|
||||
_ctx_info.file = _dl_info.dli_fname;
|
||||
_ctx_info.func = tim::demangle(_dl_info.dli_sname);
|
||||
_args.emplace_back("file", _ctx_info.file);
|
||||
return _generate_key(
|
||||
::timemory::join::join(
|
||||
" @ ", _key,
|
||||
::timemory::join::join("", _ctx_info.func, " [",
|
||||
_ctx_info.file, "]")),
|
||||
_args);
|
||||
}
|
||||
}
|
||||
|
||||
// since no line info could be deduced, include the codeptr return address
|
||||
auto _args_codeptr_v = _args;
|
||||
_args_codeptr_v.emplace_back("codeptr_ra", codeptr_ra);
|
||||
return _generate_key(_key, _args_codeptr_v);
|
||||
};
|
||||
|
||||
tim::openmp::get_function_lookup_callback<
|
||||
api_t>() = [](ompt_function_lookup_t,
|
||||
const std::optional<tim::openmp::function_lookup_params>&
|
||||
params) {
|
||||
if(!params) return;
|
||||
|
||||
ROCPROFSYS_VERBOSE(3, "[ompt] configuring device %i...\n",
|
||||
params->device_num);
|
||||
|
||||
auto& device_funcs =
|
||||
tim::openmp::get_ompt_device_functions<api_t>().at(params->device_num);
|
||||
|
||||
device_funcs.set_trace_ompt(params->device, 1, ompt_callback_target_data_op);
|
||||
device_funcs.set_trace_ompt(params->device, 1, ompt_callback_target_submit);
|
||||
|
||||
static ompt_callback_buffer_request_t request =
|
||||
[](int device_num, ompt_buffer_t** buffer, size_t* bytes) {
|
||||
ROCPROFSYS_VERBOSE(3, "[ompt] buffer request...\n");
|
||||
*bytes = ::tim::units::get_page_size();
|
||||
*buffer = mmap(nullptr, *bytes, PROT_READ | PROT_WRITE,
|
||||
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
(void) device_num;
|
||||
};
|
||||
|
||||
static ompt_callback_buffer_complete_t complete = [](int device_num,
|
||||
ompt_buffer_t* buffer,
|
||||
size_t bytes,
|
||||
ompt_buffer_cursor_t
|
||||
begin,
|
||||
int buffer_owned) {
|
||||
ROCPROFSYS_VERBOSE(3, "[ompt] buffer complete...\n");
|
||||
tim::consume_parameters(device_num, buffer, bytes, begin, buffer_owned);
|
||||
|
||||
auto _funcs =
|
||||
tim::openmp::get_ompt_device_functions<api_t>().at(device_num);
|
||||
auto _skew = rocprofsys::tracing::get_clock_skew(
|
||||
[&_funcs]() { return _funcs.get_device_time(_funcs.device); });
|
||||
|
||||
ompt_buffer_cursor_t _cursor = begin;
|
||||
size_t _nrecords = 0;
|
||||
do
|
||||
{
|
||||
if(_cursor == 0) break;
|
||||
++_nrecords;
|
||||
auto* _record = _funcs.get_record_ompt(buffer, _cursor);
|
||||
if(_record)
|
||||
{
|
||||
const char* _type = tim::openmp::get_enum_label(_record->type);
|
||||
auto _thrd_id = _record->thread_id;
|
||||
auto _targ_id = _record->target_id;
|
||||
|
||||
unsigned long beg_time = _record->time + _skew;
|
||||
unsigned long end_time = 0;
|
||||
ompt_id_t id = 0;
|
||||
const char* _name = tim::openmp::get_enum_label(_record->type);
|
||||
|
||||
if(_record->type == ompt_callback_target_submit)
|
||||
{
|
||||
auto& _data = _record->record.target_kernel;
|
||||
end_time = _data.end_time + _skew;
|
||||
id = _data.host_op_id;
|
||||
|
||||
auto _ctx_info = tim::openmp::argument_array_t{
|
||||
{ "begin_ns", beg_time },
|
||||
{ "end_ns", end_time },
|
||||
{ "type", _type },
|
||||
{ "thread_id", _thrd_id },
|
||||
{ "target_id", _targ_id },
|
||||
{ "host_op_id", id },
|
||||
{ "requested_num_teams", _data.requested_num_teams },
|
||||
{ "granted_num_teams", _data.granted_num_teams }
|
||||
};
|
||||
|
||||
component::ompt::record(
|
||||
_name, id, beg_time, end_time, _thrd_id, _targ_id,
|
||||
tim::openmp::context_info{ _name, nullptr, _ctx_info });
|
||||
}
|
||||
else if(_record->type == ompt_callback_target_data_op)
|
||||
{
|
||||
auto& _data = _record->record.target_data_op;
|
||||
end_time = _data.end_time + _skew;
|
||||
id = _data.host_op_id;
|
||||
const auto* _opname =
|
||||
tim::openmp::get_enum_label(_data.optype);
|
||||
|
||||
auto _ctx_info = tim::openmp::argument_array_t{
|
||||
{ "begin_ns", beg_time },
|
||||
{ "end_ns", end_time },
|
||||
{ "type", _type },
|
||||
{ "thread_id", _thrd_id },
|
||||
{ "target_id", _targ_id },
|
||||
{ "host_op_id", id },
|
||||
{ "optype", _opname },
|
||||
{ "src_addr", reinterpret_cast<void*>(_data.src_addr) },
|
||||
{ "dst_addr", reinterpret_cast<void*>(_data.dest_addr) },
|
||||
{ "src_device_num", _data.src_device_num },
|
||||
{ "dst_device_num", _data.dest_device_num },
|
||||
{ "bytes", _data.bytes },
|
||||
};
|
||||
|
||||
component::ompt::record(
|
||||
_opname, id, beg_time, end_time, _thrd_id, _targ_id,
|
||||
tim::openmp::context_info{ _name, nullptr, _ctx_info });
|
||||
}
|
||||
|
||||
ROCPROFSYS_VERBOSE(
|
||||
3,
|
||||
"type=%i, type_name=%s, start=%lu, end=%lu, delta=%lu, "
|
||||
"tid=%lu, target_id=%lu, host_id=%lu\n",
|
||||
_record->type, tim::openmp::get_enum_label(_record->type),
|
||||
beg_time, end_time, (end_time - beg_time), _record->thread_id,
|
||||
_record->target_id, id);
|
||||
}
|
||||
|
||||
_funcs.advance_buffer_cursor(_funcs.device, buffer, bytes, _cursor,
|
||||
&_cursor);
|
||||
} while(_cursor != 0);
|
||||
|
||||
ROCPROFSYS_VERBOSE(3, "[ompt] number of records: %zu\n", _nrecords);
|
||||
|
||||
if(buffer_owned == 1)
|
||||
{
|
||||
::munmap(buffer, bytes);
|
||||
}
|
||||
};
|
||||
|
||||
device_funcs.start_trace(params->device, request, complete);
|
||||
};
|
||||
|
||||
f_finalize = tim::ompt::configure<api_t>(lookup, initial_device_num, tool_data);
|
||||
}
|
||||
return 1; // success
|
||||
}
|
||||
|
||||
void
|
||||
tool_finalize(ompt_data_t*)
|
||||
{
|
||||
shutdown();
|
||||
}
|
||||
} // namespace
|
||||
} // namespace ompt
|
||||
} // namespace rocprofsys
|
||||
|
||||
extern "C"
|
||||
{
|
||||
ompt_start_tool_result_t* ompt_start_tool(unsigned int,
|
||||
const char*) ROCPROFSYS_PUBLIC_API;
|
||||
|
||||
ompt_start_tool_result_t* ompt_start_tool(unsigned int omp_version,
|
||||
const char* runtime_version)
|
||||
{
|
||||
ROCPROFSYS_BASIC_VERBOSE_F(0, "OpenMP version: %u, runtime version: %s\n",
|
||||
omp_version, runtime_version);
|
||||
ROCPROFSYS_METADATA("OMP_VERSION", omp_version);
|
||||
ROCPROFSYS_METADATA("OMP_RUNTIME_VERSION", runtime_version);
|
||||
static auto* data = new ompt_start_tool_result_t{
|
||||
&rocprofsys::ompt::tool_initialize, &rocprofsys::ompt::tool_finalize, { 0 }
|
||||
};
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace ompt
|
||||
{
|
||||
void
|
||||
setup()
|
||||
{}
|
||||
|
||||
void
|
||||
shutdown()
|
||||
{}
|
||||
} // namespace ompt
|
||||
} // namespace rocprofsys
|
||||
|
||||
#endif
|
||||
@@ -1,35 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace ompt
|
||||
{
|
||||
void
|
||||
setup();
|
||||
|
||||
void
|
||||
shutdown();
|
||||
} // namespace ompt
|
||||
} // namespace rocprofsys
|
||||
+271
-33
@@ -903,14 +903,189 @@ get_kernel_dispatch_timestamps()
|
||||
return _v;
|
||||
}
|
||||
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
// To handle events without finalization, perfetto push must occur in start
|
||||
// Allows capture of worker thread implicit and sync tasks
|
||||
void
|
||||
ompt_tracing_callback_start(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t* /*user_data*/,
|
||||
rocprofiler_timestamp_t ts)
|
||||
{
|
||||
static bool is_first_implicit_call = true;
|
||||
|
||||
// Ignore first ompt_implicit_call as this is created after runtime initialization but
|
||||
// before first region
|
||||
// Respective end is also not received due to finalization occurring too late
|
||||
if(is_first_implicit_call && (record.kind == ROCPROFILER_CALLBACK_TRACING_OMPT &&
|
||||
record.operation == ROCPROFILER_OMPT_ID_implicit_task))
|
||||
{
|
||||
is_first_implicit_call = false;
|
||||
return;
|
||||
}
|
||||
|
||||
std::string_view _name =
|
||||
tool_data->callback_tracing_info.at(record.kind, record.operation);
|
||||
|
||||
// Forces omp_parallel begin and end to have same name, allowing perfetto track to
|
||||
// connect. This will be changed in the future
|
||||
if(record.operation == ROCPROFILER_OMPT_ID_parallel_begin) _name = "omp_parallel";
|
||||
// Although not necessary to connect them, this forces a unified name instead of
|
||||
// the whole track being named omp_lock_init
|
||||
if(record.operation == ROCPROFILER_OMPT_ID_lock_init) _name = "omp_lock";
|
||||
|
||||
if(get_use_timemory())
|
||||
{
|
||||
component::category_region<category::rocm_marker_api>::start<quirk::timemory>(
|
||||
_name);
|
||||
}
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
auto args = callback_arg_array_t{};
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, 1,
|
||||
&args);
|
||||
}
|
||||
|
||||
uint64_t _beg_ts = ts;
|
||||
auto stream_id = stream_id_top();
|
||||
|
||||
tracing::push_perfetto_ts(
|
||||
category::rocm_ompt_api{}, _name.data(), _beg_ts,
|
||||
::perfetto::Flow::ProcessScoped(record.correlation_id.internal),
|
||||
[&](::perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(ctx, "begin_ns", _beg_ts);
|
||||
tracing::add_perfetto_annotation(ctx, "corr_id",
|
||||
record.correlation_id.internal);
|
||||
if(stream_id.handle != 0)
|
||||
tracing::add_perfetto_annotation(ctx, "stream_id",
|
||||
stream_id.handle);
|
||||
for(const auto& [key, val] : args)
|
||||
{
|
||||
tracing::add_perfetto_annotation(ctx, key, val);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ompt_tracing_callback_stop(
|
||||
rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t* /*user_data*/,
|
||||
rocprofiler_timestamp_t ts,
|
||||
std::optional<std::vector<tim::unwind::processed_entry>>& _bt_data)
|
||||
{
|
||||
std::string_view _name =
|
||||
tool_data->callback_tracing_info.at(record.kind, record.operation);
|
||||
|
||||
// Forces omp_parallel begin and end to have same name, allowing perfetto track to
|
||||
// connect. This will be changed in the future
|
||||
if(record.operation == ROCPROFILER_OMPT_ID_parallel_end) _name = "omp_parallel";
|
||||
// Although not necessary to connect them, this forces a unified name instead of
|
||||
// the whole track being named omp_lock_init
|
||||
if(record.operation == ROCPROFILER_OMPT_ID_lock_destroy) _name = "omp_lock";
|
||||
|
||||
if(get_use_timemory())
|
||||
{
|
||||
component::category_region<category::rocm_marker_api>::stop<quirk::timemory>(
|
||||
_name);
|
||||
}
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
auto args = callback_arg_array_t{};
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, 2,
|
||||
&args);
|
||||
}
|
||||
uint64_t _end_ts = ts;
|
||||
tracing::pop_perfetto_ts(
|
||||
category::rocm_ompt_api{}, _name.data(), _end_ts,
|
||||
[&](::perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
tracing::add_perfetto_annotation(ctx, "end_ns", _end_ts);
|
||||
if(_bt_data && !_bt_data->empty())
|
||||
{
|
||||
const std::string _unk = "??";
|
||||
size_t _bt_cnt = 0;
|
||||
for(const auto& itr : *_bt_data)
|
||||
{
|
||||
auto _linfo = itr.lineinfo.get();
|
||||
const auto* _func = (itr.name.empty()) ? &_unk : &itr.name;
|
||||
const auto* _loc =
|
||||
(_linfo && !_linfo.location.empty())
|
||||
? &_linfo.location
|
||||
: ((itr.location.empty()) ? &_unk : &itr.location);
|
||||
auto _line = (_linfo && _linfo.line > 0)
|
||||
? join("", _linfo.line)
|
||||
: ((itr.lineno == 0) ? std::string{ "?" }
|
||||
: join("", itr.lineno));
|
||||
auto _entry = join("", demangle(*_func), " @ ",
|
||||
join(':', ::basename(_loc->c_str()), _line));
|
||||
if(_bt_cnt < 10)
|
||||
{
|
||||
// Prepend zero for better ordering in UI. Only one zero
|
||||
// is ever necessary since stack depth is limited to 16.
|
||||
tracing::add_perfetto_annotation(
|
||||
ctx, join("", "frame#0", _bt_cnt++), _entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
tracing::add_perfetto_annotation(
|
||||
ctx, join("", "frame#", _bt_cnt++), _entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void
|
||||
tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t* user_data, void* /*callback_data*/)
|
||||
{
|
||||
using backtrace_entry_vec_t = std::vector<tim::unwind::processed_entry>;
|
||||
auto _bt_data = std::optional<backtrace_entry_vec_t>{};
|
||||
auto populate_backtrace_data = [&]() {
|
||||
constexpr size_t backtrace_stack_depth = 16;
|
||||
constexpr size_t backtrace_ignore_depth = 3;
|
||||
constexpr bool backtrace_with_signal_frame = true;
|
||||
auto use_perfetto =
|
||||
(config::get_use_perfetto() && config::get_perfetto_annotations());
|
||||
auto use_rocpd = config::get_use_rocpd();
|
||||
|
||||
if((use_perfetto || use_rocpd) &&
|
||||
tool_data->backtrace_operations.at(record.kind).count(record.operation) > 0)
|
||||
{
|
||||
auto _backtrace =
|
||||
tim::get_unw_stack<backtrace_stack_depth, backtrace_ignore_depth,
|
||||
backtrace_with_signal_frame>();
|
||||
_bt_data = backtrace_entry_vec_t{};
|
||||
_bt_data->reserve(_backtrace.size());
|
||||
for(auto itr : _backtrace)
|
||||
{
|
||||
if(itr)
|
||||
{
|
||||
if(auto _val = binary::lookup_ipaddr_entry<false>(itr->address());
|
||||
_val)
|
||||
{
|
||||
_bt_data->emplace_back(std::move(*_val));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
auto ts = rocprofiler_timestamp_t{};
|
||||
ROCPROFILER_CALL(rocprofiler_get_timestamp(&ts));
|
||||
const char* name = "";
|
||||
|
||||
const char* name = nullptr;
|
||||
rocprofiler_query_callback_tracing_kind_operation_name(record.kind, record.operation,
|
||||
&name, nullptr);
|
||||
|
||||
@@ -921,6 +1096,13 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
<< ", phase=" << record.phase << ", dt_nsec=" << std::setw(8) << ts
|
||||
<< ", name=" << name;
|
||||
|
||||
if(rocprofsys::get_state() != rocprofsys::State::Active)
|
||||
{
|
||||
ROCPROFSYS_WARNING_F(0, "Callback called when tool is not active.\n\t%s\n",
|
||||
info.str().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
|
||||
{
|
||||
user_data->value = ts;
|
||||
@@ -949,6 +1131,11 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
break;
|
||||
}
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
case ROCPROFILER_CALLBACK_TRACING_OMPT:
|
||||
{
|
||||
ompt_tracing_callback_start(record, user_data, ts);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API:
|
||||
{
|
||||
tool_tracing_callback_start(category::rocm_rocdecode_api{}, record,
|
||||
@@ -979,7 +1166,7 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
case ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH:
|
||||
case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY:
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
case ROCPROFILER_CALLBACK_TRACING_OMPT:
|
||||
case ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION:
|
||||
case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION:
|
||||
#endif
|
||||
#if(ROCPROFILER_VERSION >= 700)
|
||||
@@ -1000,36 +1187,7 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
}
|
||||
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
|
||||
{
|
||||
using backtrace_entry_vec_t = std::vector<tim::unwind::processed_entry>;
|
||||
|
||||
constexpr size_t bt_stack_depth = 16;
|
||||
constexpr size_t bt_ignore_depth = 3;
|
||||
constexpr bool bt_with_signal_frame = true;
|
||||
|
||||
auto _bt_data = std::optional<backtrace_entry_vec_t>{};
|
||||
auto use_perfetto =
|
||||
(config::get_use_perfetto() && config::get_perfetto_annotations());
|
||||
auto use_rocpd = config::get_use_rocpd();
|
||||
|
||||
if((use_perfetto || use_rocpd) &&
|
||||
tool_data->backtrace_operations.at(record.kind).count(record.operation) > 0)
|
||||
{
|
||||
auto _backtrace = tim::get_unw_stack<bt_stack_depth, bt_ignore_depth,
|
||||
bt_with_signal_frame>();
|
||||
_bt_data = backtrace_entry_vec_t{};
|
||||
_bt_data->reserve(_backtrace.size());
|
||||
for(auto itr : _backtrace)
|
||||
{
|
||||
if(itr)
|
||||
{
|
||||
if(auto _val = binary::lookup_ipaddr_entry<false>(itr->address());
|
||||
_val)
|
||||
{
|
||||
_bt_data->emplace_back(std::move(*_val));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
populate_backtrace_data();
|
||||
|
||||
switch(record.kind)
|
||||
{
|
||||
@@ -1056,6 +1214,11 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
break;
|
||||
}
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
case ROCPROFILER_CALLBACK_TRACING_OMPT:
|
||||
{
|
||||
ompt_tracing_callback_stop(record, user_data, ts, _bt_data);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API:
|
||||
{
|
||||
tool_tracing_callback_stop(category::rocm_rocdecode_api{}, record,
|
||||
@@ -1087,7 +1250,7 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
case ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH:
|
||||
case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY:
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
case ROCPROFILER_CALLBACK_TRACING_OMPT:
|
||||
case ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION:
|
||||
case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION:
|
||||
#endif
|
||||
#if(ROCPROFILER_VERSION >= 700)
|
||||
@@ -1125,6 +1288,77 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
}
|
||||
}
|
||||
break;
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
case ROCPROFILER_CALLBACK_TRACING_OMPT:
|
||||
{
|
||||
// Callbacks that are received but that we do not process
|
||||
static const std::set<rocprofiler_ompt_operation_t> ompt_no_process = {
|
||||
ROCPROFILER_OMPT_ID_callback_functions, // "Fake" callback
|
||||
// There is no point in handling ompt_thread_begin events as the
|
||||
// corresponding ompt_thread_end event will not occur unless
|
||||
// runtime is finalized earlier
|
||||
ROCPROFILER_OMPT_ID_thread_begin,
|
||||
ROCPROFILER_OMPT_ID_thread_end,
|
||||
};
|
||||
|
||||
auto ompt_operation_type =
|
||||
static_cast<rocprofiler_ompt_operation_t>(record.operation);
|
||||
if(ompt_no_process.find(ompt_operation_type) != ompt_no_process.end())
|
||||
return;
|
||||
|
||||
populate_backtrace_data();
|
||||
|
||||
switch(ompt_operation_type)
|
||||
{
|
||||
case ROCPROFILER_OMPT_ID_parallel_begin:
|
||||
ompt_tracing_callback_start(record, user_data, ts);
|
||||
break;
|
||||
case ROCPROFILER_OMPT_ID_parallel_end:
|
||||
ompt_tracing_callback_stop(record, user_data, ts, _bt_data);
|
||||
break;
|
||||
case ROCPROFILER_OMPT_ID_lock_init:
|
||||
ompt_tracing_callback_start(record, user_data, ts);
|
||||
break;
|
||||
case ROCPROFILER_OMPT_ID_lock_destroy:
|
||||
ompt_tracing_callback_stop(record, user_data, ts, _bt_data);
|
||||
break;
|
||||
// Although this has endpoint arg, treat it as instant event
|
||||
case ROCPROFILER_OMPT_ID_nest_lock:
|
||||
case ROCPROFILER_OMPT_ID_dispatch:
|
||||
case ROCPROFILER_OMPT_ID_flush:
|
||||
case ROCPROFILER_OMPT_ID_cancel:
|
||||
case ROCPROFILER_OMPT_ID_device_initialize:
|
||||
case ROCPROFILER_OMPT_ID_device_finalize:
|
||||
case ROCPROFILER_OMPT_ID_device_load:
|
||||
// case ROCPROFILER_OMPT_ID_device_unload: // Unsupported by runtime
|
||||
case ROCPROFILER_OMPT_ID_task_create:
|
||||
case ROCPROFILER_OMPT_ID_task_schedule:
|
||||
case ROCPROFILER_OMPT_ID_mutex_released:
|
||||
case ROCPROFILER_OMPT_ID_mutex_acquire:
|
||||
case ROCPROFILER_OMPT_ID_mutex_acquired:
|
||||
case ROCPROFILER_OMPT_ID_dependences:
|
||||
case ROCPROFILER_OMPT_ID_task_dependence:
|
||||
case ROCPROFILER_OMPT_ID_error:
|
||||
{
|
||||
// These callbacks are considered instant events and should start
|
||||
// and immediately call stop as no corresponding "end" will be
|
||||
// received
|
||||
ompt_tracing_callback_start(record, user_data, ts);
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_get_timestamp(&ts)); // Set artificial end ts
|
||||
ompt_tracing_callback_stop(record, user_data, ts, _bt_data);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
ROCPROFSYS_WARNING_F(
|
||||
1,
|
||||
"tool_tracing_callback: unhandled PHASE_NONE "
|
||||
"callback record\n\t%s\n",
|
||||
info.str().c_str());
|
||||
}
|
||||
}
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
{
|
||||
ROCPROFSYS_WARNING_F(1,
|
||||
@@ -1138,6 +1372,9 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
else
|
||||
{
|
||||
ROCPROFSYS_CI_ABORT(true, "unhandled callback record phase: %i\n", record.phase);
|
||||
ROCPROFSYS_WARNING_F(1,
|
||||
"tool_tracing_callback: unhandled callback record\n\t%s\n",
|
||||
info.str().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1706,6 +1943,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
|
||||
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_RCCL_API,
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
ROCPROFILER_CALLBACK_TRACING_OMPT,
|
||||
ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API,
|
||||
#endif
|
||||
#if(ROCPROFILER_VERSION >= 700)
|
||||
|
||||
@@ -51,7 +51,7 @@ if(NOT EXISTS "${_rocm_llvm_lib}/libomptarget.so" AND ROCPROFSYS_USE_ROCM)
|
||||
endif()
|
||||
|
||||
if(ROCPROFSYS_OPENMP_USING_LIBOMP_LIBRARY AND ROCPROFSYS_USE_OMPT)
|
||||
set(_OMPT_PASS_REGEX "\\|_ompt_")
|
||||
set(_OMPT_PASS_REGEX "\\|_omp_")
|
||||
else()
|
||||
set(_OMPT_PASS_REGEX "")
|
||||
endif()
|
||||
|
||||
Atsaukties uz šo jaunā problēmā
Block a user