From d22725e830b8d4bd9ca59ccba9bfaf699efa09b5 Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Mon, 18 Jul 2022 02:06:40 -0500 Subject: [PATCH] Support ACTIVITY_DOMAIN_ROCTX (#87) - New configuration variable: OMNITRACE_USE_ROCTX - Enable support for roctxRangePushA, roctxRangePop, roctxRangeStartA, roctxRangeStop --- .../lib/omnitrace/library/components/fwd.hpp | 2 + .../library/components/roctracer.cpp | 7 +- source/lib/omnitrace/library/config.cpp | 16 ++++ source/lib/omnitrace/library/config.hpp | 3 + source/lib/omnitrace/library/perfetto.hpp | 1 + source/lib/omnitrace/library/roctracer.cpp | 87 +++++++++++++++++++ source/lib/omnitrace/library/roctracer.hpp | 3 + 7 files changed, 117 insertions(+), 2 deletions(-) diff --git a/source/lib/omnitrace/library/components/fwd.hpp b/source/lib/omnitrace/library/components/fwd.hpp index 836370eff3..c21014fa1e 100644 --- a/source/lib/omnitrace/library/components/fwd.hpp +++ b/source/lib/omnitrace/library/components/fwd.hpp @@ -69,6 +69,7 @@ TIMEMORY_DEFINE_NS_API(category, device_hsa) TIMEMORY_DEFINE_NS_API(category, rocm_hip) TIMEMORY_DEFINE_NS_API(category, rocm_hsa) TIMEMORY_DEFINE_NS_API(category, rocm_smi) +TIMEMORY_DEFINE_NS_API(category, rocm_roctx) TIMEMORY_DEFINE_NS_API(category, kokkos) TIMEMORY_DEFINE_NS_API(category, mpi) TIMEMORY_DEFINE_NS_API(category, ompt) @@ -84,6 +85,7 @@ TIMEMORY_DEFINE_NAME_TRAIT("user", category::user); TIMEMORY_DEFINE_NAME_TRAIT("rocm_hip", category::rocm_hip); TIMEMORY_DEFINE_NAME_TRAIT("rocm_hsa", category::rocm_hsa); TIMEMORY_DEFINE_NAME_TRAIT("rocm_smi", category::rocm_smi); +TIMEMORY_DEFINE_NAME_TRAIT("rocm_roctx", category::rocm_roctx); TIMEMORY_DEFINE_NAME_TRAIT("sampling", category::sampling); TIMEMORY_DEFINE_NAME_TRAIT("thread_sampling", category::thread_sampling); TIMEMORY_DEFINE_NAME_TRAIT("kokkos", category::kokkos); diff --git a/source/lib/omnitrace/library/components/roctracer.cpp b/source/lib/omnitrace/library/components/roctracer.cpp index bd8f62fbb2..905a3246e5 100644 --- a/source/lib/omnitrace/library/components/roctracer.cpp +++ b/source/lib/omnitrace/library/components/roctracer.cpp @@ -150,8 +150,11 @@ roctracer::setup() ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, nullptr)); - // ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, - // hip_api_callback, nullptr)); + if(get_use_roctx()) + { + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, + roctx_api_callback, nullptr)); + } // Enable HIP activity tracing ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS)); diff --git a/source/lib/omnitrace/library/config.cpp b/source/lib/omnitrace/library/config.cpp index 86d4fa971e..6542fa04d3 100644 --- a/source/lib/omnitrace/library/config.cpp +++ b/source/lib/omnitrace/library/config.cpp @@ -228,6 +228,11 @@ configure_settings(bool _init) "Enable sampling GPU power, temp, utilization, and memory usage", true, "backend", "rocm_smi", "rocm"); + OMNITRACE_CONFIG_SETTING( + bool, "OMNITRACE_USE_ROCTX", + "Enable ROCtx API. Warning! Out-of-order ranges may corrupt perfetto flamegraph", + false, "backend", "roctracer", "rocm", "roctx"); + OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_USE_SAMPLING", "Enable statistical sampling of call-stack", false, "backend", "sampling"); @@ -1198,6 +1203,17 @@ get_use_rocm_smi() #endif } +bool +get_use_roctx() +{ +#if defined(OMNITRACE_USE_ROCTRACER) && OMNITRACE_USE_ROCTRACER > 0 + static auto _v = get_config()->find("OMNITRACE_USE_ROCTX"); + return static_cast&>(*_v->second).get(); +#else + return false; +#endif +} + bool& get_use_sampling() { diff --git a/source/lib/omnitrace/library/config.hpp b/source/lib/omnitrace/library/config.hpp index 49835430c8..4333e19277 100644 --- a/source/lib/omnitrace/library/config.hpp +++ b/source/lib/omnitrace/library/config.hpp @@ -174,6 +174,9 @@ get_use_rocprofiler() OMNITRACE_HOT; bool get_use_rocm_smi() OMNITRACE_HOT; +bool +get_use_roctx(); + bool& get_use_sampling() OMNITRACE_HOT; diff --git a/source/lib/omnitrace/library/perfetto.hpp b/source/lib/omnitrace/library/perfetto.hpp index 62de2252be..30daf2391a 100644 --- a/source/lib/omnitrace/library/perfetto.hpp +++ b/source/lib/omnitrace/library/perfetto.hpp @@ -38,6 +38,7 @@ .SetDescription("Device-side functions submitted via HIP API"), \ perfetto::Category("rocm_hip").SetDescription("Host-side HIP functions"), \ perfetto::Category("rocm_hsa").SetDescription("Host-side HSA functions"), \ + perfetto::Category("rocm_roctx").SetDescription("Host-side ROCTX labels"), \ perfetto::Category("device_busy") \ .SetDescription("Busy percentage of a GPU device"), \ perfetto::Category("device_temp") \ diff --git a/source/lib/omnitrace/library/roctracer.cpp b/source/lib/omnitrace/library/roctracer.cpp index 1a02ddeb2f..9c55ec63e0 100644 --- a/source/lib/omnitrace/library/roctracer.cpp +++ b/source/lib/omnitrace/library/roctracer.cpp @@ -22,6 +22,7 @@ #include "library/roctracer.hpp" #include "library.hpp" +#include "library/components/fwd.hpp" #include "library/config.hpp" #include "library/critical_trace.hpp" #include "library/debug.hpp" @@ -44,6 +45,7 @@ #include #include #include +#include #define AMD_INTERNAL_BUILD 1 #include @@ -432,6 +434,91 @@ namespace thread_local std::unordered_map gpu_cids = {}; } +void +roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, + void* /*arg*/) +{ + if(get_state() != State::Active || !trait::runtime_enabled::get()) + return; + + OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); + + if(domain != ACTIVITY_DOMAIN_ROCTX) return; + + static auto _range_map = std::unordered_map{}; + static auto _range_lock = std::mutex{}; + const auto* _data = reinterpret_cast(callback_data); + + switch(cid) + { + case ROCTX_API_ID_roctxRangePushA: + { + if(get_use_perfetto()) + tracing::push_perfetto(category::rocm_roctx{}, _data->args.message); + + if(get_use_timemory()) tracing::push_timemory(_data->args.message); + + break; + } + case ROCTX_API_ID_roctxRangePop: + { + if(get_use_timemory()) tracing::pop_timemory(_data->args.message); + if(get_use_perfetto()) + tracing::pop_perfetto(category::rocm_roctx{}, _data->args.message); + break; + } + case ROCTX_API_ID_roctxRangeStartA: + { + { + std::unique_lock _lk{ _range_lock, std::defer_lock }; + if(!_lk.owns_lock()) _lk.lock(); + _range_map.emplace(roctx_range_id_t{ _data->args.id }, + std::string_view{ _data->args.message }); + } + + if(get_use_perfetto()) + tracing::push_perfetto(category::rocm_roctx{}, _data->args.message); + + if(get_use_timemory()) tracing::push_timemory(_data->args.message); + break; + } + case ROCTX_API_ID_roctxRangeStop: + { + std::string_view _message = {}; + { + std::unique_lock _lk{ _range_lock, std::defer_lock }; + if(!_lk.owns_lock()) _lk.lock(); + auto itr = _range_map.find(roctx_range_id_t{ _data->args.id }); + OMNITRACE_CI_THROW(itr == _range_map.end(), + "Error! could not find range with id %lu\n", + _data->args.id); + if(itr == _range_map.end()) + { + OMNITRACE_VERBOSE(0, "Warning! could not find range with id %lu\n", + _data->args.id); + return; + } + else + { + _message = itr->second; + } + } + + if(!_message.empty()) + { + if(get_use_timemory()) tracing::pop_timemory(_message.data()); + if(get_use_perfetto()) + tracing::pop_perfetto(category::rocm_roctx{}, _message.data()); + } + + break; + } + case ROCTX_API_ID_roctxMarkA: + // we do nothing with marker events...for now + default: break; + } +} + // HIP API callback function void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) diff --git a/source/lib/omnitrace/library/roctracer.hpp b/source/lib/omnitrace/library/roctracer.hpp index 044f0fb40c..951bc5a33e 100644 --- a/source/lib/omnitrace/library/roctracer.hpp +++ b/source/lib/omnitrace/library/roctracer.hpp @@ -67,6 +67,9 @@ hip_exec_activity_callbacks(int64_t _tid); void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg); +void +roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg); + // Activity tracing callback void hip_activity_callback(const char* begin, const char* end, void*);