Support ACTIVITY_DOMAIN_ROCTX (#87)

- New configuration variable: OMNITRACE_USE_ROCTX
- Enable support for roctxRangePushA, roctxRangePop, roctxRangeStartA, roctxRangeStop
Этот коммит содержится в:
Jonathan R. Madsen
2022-07-18 02:06:40 -05:00
коммит произвёл GitHub
родитель a2dcc7381b
Коммит d22725e830
7 изменённых файлов: 117 добавлений и 2 удалений
+2
Просмотреть файл
@@ -69,6 +69,7 @@ TIMEMORY_DEFINE_NS_API(category, device_hsa)
TIMEMORY_DEFINE_NS_API(category, rocm_hip)
TIMEMORY_DEFINE_NS_API(category, rocm_hsa)
TIMEMORY_DEFINE_NS_API(category, rocm_smi)
TIMEMORY_DEFINE_NS_API(category, rocm_roctx)
TIMEMORY_DEFINE_NS_API(category, kokkos)
TIMEMORY_DEFINE_NS_API(category, mpi)
TIMEMORY_DEFINE_NS_API(category, ompt)
@@ -84,6 +85,7 @@ TIMEMORY_DEFINE_NAME_TRAIT("user", category::user);
TIMEMORY_DEFINE_NAME_TRAIT("rocm_hip", category::rocm_hip);
TIMEMORY_DEFINE_NAME_TRAIT("rocm_hsa", category::rocm_hsa);
TIMEMORY_DEFINE_NAME_TRAIT("rocm_smi", category::rocm_smi);
TIMEMORY_DEFINE_NAME_TRAIT("rocm_roctx", category::rocm_roctx);
TIMEMORY_DEFINE_NAME_TRAIT("sampling", category::sampling);
TIMEMORY_DEFINE_NAME_TRAIT("thread_sampling", category::thread_sampling);
TIMEMORY_DEFINE_NAME_TRAIT("kokkos", category::kokkos);
+5 -2
Просмотреть файл
@@ -150,8 +150,11 @@ roctracer::setup()
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API,
hip_api_callback, nullptr));
// ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX,
// hip_api_callback, nullptr));
if(get_use_roctx())
{
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX,
roctx_api_callback, nullptr));
}
// Enable HIP activity tracing
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS));
+16
Просмотреть файл
@@ -228,6 +228,11 @@ configure_settings(bool _init)
"Enable sampling GPU power, temp, utilization, and memory usage", true, "backend",
"rocm_smi", "rocm");
OMNITRACE_CONFIG_SETTING(
bool, "OMNITRACE_USE_ROCTX",
"Enable ROCtx API. Warning! Out-of-order ranges may corrupt perfetto flamegraph",
false, "backend", "roctracer", "rocm", "roctx");
OMNITRACE_CONFIG_SETTING(bool, "OMNITRACE_USE_SAMPLING",
"Enable statistical sampling of call-stack", false,
"backend", "sampling");
@@ -1198,6 +1203,17 @@ get_use_rocm_smi()
#endif
}
bool
get_use_roctx()
{
#if defined(OMNITRACE_USE_ROCTRACER) && OMNITRACE_USE_ROCTRACER > 0
static auto _v = get_config()->find("OMNITRACE_USE_ROCTX");
return static_cast<tim::tsettings<bool>&>(*_v->second).get();
#else
return false;
#endif
}
bool&
get_use_sampling()
{
+3
Просмотреть файл
@@ -174,6 +174,9 @@ get_use_rocprofiler() OMNITRACE_HOT;
bool
get_use_rocm_smi() OMNITRACE_HOT;
bool
get_use_roctx();
bool&
get_use_sampling() OMNITRACE_HOT;
+1
Просмотреть файл
@@ -38,6 +38,7 @@
.SetDescription("Device-side functions submitted via HIP API"), \
perfetto::Category("rocm_hip").SetDescription("Host-side HIP functions"), \
perfetto::Category("rocm_hsa").SetDescription("Host-side HSA functions"), \
perfetto::Category("rocm_roctx").SetDescription("Host-side ROCTX labels"), \
perfetto::Category("device_busy") \
.SetDescription("Busy percentage of a GPU device"), \
perfetto::Category("device_temp") \
+87
Просмотреть файл
@@ -22,6 +22,7 @@
#include "library/roctracer.hpp"
#include "library.hpp"
#include "library/components/fwd.hpp"
#include "library/config.hpp"
#include "library/critical_trace.hpp"
#include "library/debug.hpp"
@@ -44,6 +45,7 @@
#include <roctracer_ext.h>
#include <roctracer_hcc.h>
#include <roctracer_hip.h>
#include <roctracer_roctx.h>
#define AMD_INTERNAL_BUILD 1
#include <roctracer_hsa.h>
@@ -432,6 +434,91 @@ namespace
thread_local std::unordered_map<size_t, size_t> gpu_cids = {};
}
void
roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
void* /*arg*/)
{
if(get_state() != State::Active || !trait::runtime_enabled<comp::roctracer>::get())
return;
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
if(domain != ACTIVITY_DOMAIN_ROCTX) return;
static auto _range_map = std::unordered_map<roctx_range_id_t, std::string_view>{};
static auto _range_lock = std::mutex{};
const auto* _data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
switch(cid)
{
case ROCTX_API_ID_roctxRangePushA:
{
if(get_use_perfetto())
tracing::push_perfetto(category::rocm_roctx{}, _data->args.message);
if(get_use_timemory()) tracing::push_timemory(_data->args.message);
break;
}
case ROCTX_API_ID_roctxRangePop:
{
if(get_use_timemory()) tracing::pop_timemory(_data->args.message);
if(get_use_perfetto())
tracing::pop_perfetto(category::rocm_roctx{}, _data->args.message);
break;
}
case ROCTX_API_ID_roctxRangeStartA:
{
{
std::unique_lock<std::mutex> _lk{ _range_lock, std::defer_lock };
if(!_lk.owns_lock()) _lk.lock();
_range_map.emplace(roctx_range_id_t{ _data->args.id },
std::string_view{ _data->args.message });
}
if(get_use_perfetto())
tracing::push_perfetto(category::rocm_roctx{}, _data->args.message);
if(get_use_timemory()) tracing::push_timemory(_data->args.message);
break;
}
case ROCTX_API_ID_roctxRangeStop:
{
std::string_view _message = {};
{
std::unique_lock<std::mutex> _lk{ _range_lock, std::defer_lock };
if(!_lk.owns_lock()) _lk.lock();
auto itr = _range_map.find(roctx_range_id_t{ _data->args.id });
OMNITRACE_CI_THROW(itr == _range_map.end(),
"Error! could not find range with id %lu\n",
_data->args.id);
if(itr == _range_map.end())
{
OMNITRACE_VERBOSE(0, "Warning! could not find range with id %lu\n",
_data->args.id);
return;
}
else
{
_message = itr->second;
}
}
if(!_message.empty())
{
if(get_use_timemory()) tracing::pop_timemory(_message.data());
if(get_use_perfetto())
tracing::pop_perfetto(category::rocm_roctx{}, _message.data());
}
break;
}
case ROCTX_API_ID_roctxMarkA:
// we do nothing with marker events...for now
default: break;
}
}
// HIP API callback function
void
hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg)
+3
Просмотреть файл
@@ -67,6 +67,9 @@ hip_exec_activity_callbacks(int64_t _tid);
void
hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg);
void
roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg);
// Activity tracing callback
void
hip_activity_callback(const char* begin, const char* end, void*);