Support ACTIVITY_DOMAIN_ROCTX (#87)
- New configuration variable: OMNITRACE_USE_ROCTX
- Enable support for roctxRangePushA, roctxRangePop, roctxRangeStartA, roctxRangeStop
[ROCm/rocprofiler-systems commit: d22725e830]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
39adaaef98
Коммит
41b86132bb
@@ -69,6 +69,7 @@ TIMEMORY_DEFINE_NS_API(category, device_hsa)
|
||||
TIMEMORY_DEFINE_NS_API(category, rocm_hip)
|
||||
TIMEMORY_DEFINE_NS_API(category, rocm_hsa)
|
||||
TIMEMORY_DEFINE_NS_API(category, rocm_smi)
|
||||
TIMEMORY_DEFINE_NS_API(category, rocm_roctx)
|
||||
TIMEMORY_DEFINE_NS_API(category, kokkos)
|
||||
TIMEMORY_DEFINE_NS_API(category, mpi)
|
||||
TIMEMORY_DEFINE_NS_API(category, ompt)
|
||||
@@ -84,6 +85,7 @@ TIMEMORY_DEFINE_NAME_TRAIT("user", category::user);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("rocm_hip", category::rocm_hip);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("rocm_hsa", category::rocm_hsa);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("rocm_smi", category::rocm_smi);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("rocm_roctx", category::rocm_roctx);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("sampling", category::sampling);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("thread_sampling", category::thread_sampling);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("kokkos", category::kokkos);
|
||||
|
||||
+5
-2
@@ -150,8 +150,11 @@ roctracer::setup()
|
||||
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API,
|
||||
hip_api_callback, nullptr));
|
||||
// ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX,
|
||||
// hip_api_callback, nullptr));
|
||||
if(get_use_roctx())
|
||||
{
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX,
|
||||
roctx_api_callback, nullptr));
|
||||
}
|
||||
// Enable HIP activity tracing
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS));
|
||||
|
||||
|
||||
@@ -228,6 +228,11 @@ configure_settings(bool _init)
|
||||
"Enable sampling GPU power, temp, utilization, and memory usage", true, "backend",
|
||||
"rocm_smi", "rocm");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(
|
||||
bool, "OMNITRACE_USE_ROCTX",
|
||||
"Enable ROCtx API. Warning! Out-of-order ranges may corrupt perfetto flamegraph",
|
||||
false, "backend", "roctracer", "rocm", "roctx");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_USE_SAMPLING",
|
||||
"Enable statistical sampling of call-stack", false,
|
||||
"backend", "sampling");
|
||||
@@ -1198,6 +1203,17 @@ get_use_rocm_smi()
|
||||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
get_use_roctx()
|
||||
{
|
||||
#if defined(OMNITRACE_USE_ROCTRACER) && OMNITRACE_USE_ROCTRACER > 0
|
||||
static auto _v = get_config()->find("OMNITRACE_USE_ROCTX");
|
||||
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool&
|
||||
get_use_sampling()
|
||||
{
|
||||
|
||||
@@ -174,6 +174,9 @@ get_use_rocprofiler() OMNITRACE_HOT;
|
||||
bool
|
||||
get_use_rocm_smi() OMNITRACE_HOT;
|
||||
|
||||
bool
|
||||
get_use_roctx();
|
||||
|
||||
bool&
|
||||
get_use_sampling() OMNITRACE_HOT;
|
||||
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
.SetDescription("Device-side functions submitted via HIP API"), \
|
||||
perfetto::Category("rocm_hip").SetDescription("Host-side HIP functions"), \
|
||||
perfetto::Category("rocm_hsa").SetDescription("Host-side HSA functions"), \
|
||||
perfetto::Category("rocm_roctx").SetDescription("Host-side ROCTX labels"), \
|
||||
perfetto::Category("device_busy") \
|
||||
.SetDescription("Busy percentage of a GPU device"), \
|
||||
perfetto::Category("device_temp") \
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
#include "library/roctracer.hpp"
|
||||
#include "library.hpp"
|
||||
#include "library/components/fwd.hpp"
|
||||
#include "library/config.hpp"
|
||||
#include "library/critical_trace.hpp"
|
||||
#include "library/debug.hpp"
|
||||
@@ -44,6 +45,7 @@
|
||||
#include <roctracer_ext.h>
|
||||
#include <roctracer_hcc.h>
|
||||
#include <roctracer_hip.h>
|
||||
#include <roctracer_roctx.h>
|
||||
|
||||
#define AMD_INTERNAL_BUILD 1
|
||||
#include <roctracer_hsa.h>
|
||||
@@ -432,6 +434,91 @@ namespace
|
||||
thread_local std::unordered_map<size_t, size_t> gpu_cids = {};
|
||||
}
|
||||
|
||||
void
|
||||
roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
|
||||
void* /*arg*/)
|
||||
{
|
||||
if(get_state() != State::Active || !trait::runtime_enabled<comp::roctracer>::get())
|
||||
return;
|
||||
|
||||
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
|
||||
if(domain != ACTIVITY_DOMAIN_ROCTX) return;
|
||||
|
||||
static auto _range_map = std::unordered_map<roctx_range_id_t, std::string_view>{};
|
||||
static auto _range_lock = std::mutex{};
|
||||
const auto* _data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
|
||||
|
||||
switch(cid)
|
||||
{
|
||||
case ROCTX_API_ID_roctxRangePushA:
|
||||
{
|
||||
if(get_use_perfetto())
|
||||
tracing::push_perfetto(category::rocm_roctx{}, _data->args.message);
|
||||
|
||||
if(get_use_timemory()) tracing::push_timemory(_data->args.message);
|
||||
|
||||
break;
|
||||
}
|
||||
case ROCTX_API_ID_roctxRangePop:
|
||||
{
|
||||
if(get_use_timemory()) tracing::pop_timemory(_data->args.message);
|
||||
if(get_use_perfetto())
|
||||
tracing::pop_perfetto(category::rocm_roctx{}, _data->args.message);
|
||||
break;
|
||||
}
|
||||
case ROCTX_API_ID_roctxRangeStartA:
|
||||
{
|
||||
{
|
||||
std::unique_lock<std::mutex> _lk{ _range_lock, std::defer_lock };
|
||||
if(!_lk.owns_lock()) _lk.lock();
|
||||
_range_map.emplace(roctx_range_id_t{ _data->args.id },
|
||||
std::string_view{ _data->args.message });
|
||||
}
|
||||
|
||||
if(get_use_perfetto())
|
||||
tracing::push_perfetto(category::rocm_roctx{}, _data->args.message);
|
||||
|
||||
if(get_use_timemory()) tracing::push_timemory(_data->args.message);
|
||||
break;
|
||||
}
|
||||
case ROCTX_API_ID_roctxRangeStop:
|
||||
{
|
||||
std::string_view _message = {};
|
||||
{
|
||||
std::unique_lock<std::mutex> _lk{ _range_lock, std::defer_lock };
|
||||
if(!_lk.owns_lock()) _lk.lock();
|
||||
auto itr = _range_map.find(roctx_range_id_t{ _data->args.id });
|
||||
OMNITRACE_CI_THROW(itr == _range_map.end(),
|
||||
"Error! could not find range with id %lu\n",
|
||||
_data->args.id);
|
||||
if(itr == _range_map.end())
|
||||
{
|
||||
OMNITRACE_VERBOSE(0, "Warning! could not find range with id %lu\n",
|
||||
_data->args.id);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
_message = itr->second;
|
||||
}
|
||||
}
|
||||
|
||||
if(!_message.empty())
|
||||
{
|
||||
if(get_use_timemory()) tracing::pop_timemory(_message.data());
|
||||
if(get_use_perfetto())
|
||||
tracing::pop_perfetto(category::rocm_roctx{}, _message.data());
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case ROCTX_API_ID_roctxMarkA:
|
||||
// we do nothing with marker events...for now
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
// HIP API callback function
|
||||
void
|
||||
hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg)
|
||||
|
||||
@@ -67,6 +67,9 @@ hip_exec_activity_callbacks(int64_t _tid);
|
||||
void
|
||||
hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg);
|
||||
|
||||
void
|
||||
roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg);
|
||||
|
||||
// Activity tracing callback
|
||||
void
|
||||
hip_activity_callback(const char* begin, const char* end, void*);
|
||||
|
||||
Ссылка в новой задаче
Block a user