Roctracer perfetto flow fixes (#267)
* testing label updates
- automatically add "gpu", "roctracer", "rocm-smi", and "rocprofiler" test labels when appropriate
* Bump version to v1.9.1
* roctracer and config updates
- fix perfetto::Flow
- use roctracer correlation ID instead of critical trace correlation ID
- renamed ambiguous _cid, _parent_cid, _corr_id variables to _crit_cid, _parent_crit_cid, _roct_cid
- use atomic_{mutex,lock} instead of STL mutex/lock
- support for individual perfetto annotations for HIP API args
- OMNITRACE_PERFETTO_COMPACT_ROCTRACER_ANNOTATIONS option for controlling compact vs. individual perfetto annotations for HIP API args
* Update timemory submodule
- argparser updates
- help prints to std::cout by default now
- supports setting custom ostream
* cmake formatting
* config::get_setting_value updates
- config::get_setting_value returns std::optional instead of std::pair<bool, Tp>
This commit is contained in:
committed by
GitHub
parent
9eafb23602
commit
279a8e0952
Vendored
+1
-1
Submodule external/timemory updated: 50c13fef89...1ab76c36ef
@@ -406,11 +406,7 @@ configure_settings(bool _init)
|
||||
"durations are needed, see OMNITRACE_TRACE_PERIODS.",
|
||||
0.0, "trace", "profile", "perfetto", "timemory");
|
||||
|
||||
auto _clock_s =
|
||||
config::get_setting_value<std::string>("OMNITRACE_TRACE_PERIOD_CLOCK_ID").second;
|
||||
|
||||
auto _clock_choices = std::vector<std::string>{};
|
||||
|
||||
for(const auto& itr : constraint::get_valid_clock_ids())
|
||||
{
|
||||
_clock_choices.emplace_back(
|
||||
@@ -696,6 +692,18 @@ configure_settings(bool _init)
|
||||
"feature may dramatically reduce the size of the trace",
|
||||
true, "perfetto", "data", "debugging", "advanced");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(
|
||||
bool, "OMNITRACE_PERFETTO_COMPACT_ROCTRACER_ANNOTATIONS",
|
||||
"When PERFETTO_ANNOTATIONS, USE_ROCTRACER, and ROCTRACER_HIP_API are all "
|
||||
"enabled, enabling this option will result in the arg information for HIP API "
|
||||
"calls to all be within one annotation (e.g., args=\"stream=0x0, dst=0x1F, "
|
||||
"sizeBytes=64, src=0x08, kind=1\"). When disabled, each parameter will be an "
|
||||
"individual annotation (e.g. stream, dst, sizeBytes, etc.). The benefit of the "
|
||||
"former is that it is faster to serialize and consumes less file space; the "
|
||||
"benefit of the latter is that it becomes much easier to find slices in the "
|
||||
"trace with the same value",
|
||||
false, "perfetto", "data", "debugging", "roctracer", "rocm", "advanced");
|
||||
|
||||
OMNITRACE_CONFIG_SETTING(
|
||||
uint64_t, "OMNITRACE_THREAD_POOL_SIZE",
|
||||
"Max number of threads for processing background tasks",
|
||||
@@ -1095,7 +1103,7 @@ configure_mode_settings(const std::shared_ptr<settings>& _config)
|
||||
}
|
||||
else
|
||||
{
|
||||
bool _changed = get_setting_value<bool>(_name).second != _v;
|
||||
bool _changed = get_setting_value<bool>(_name).value_or(!_v) != _v;
|
||||
OMNITRACE_BASIC_VERBOSE(
|
||||
1 && _changed,
|
||||
"[configure_mode_settings] Overriding %s to %s in %s mode...\n",
|
||||
@@ -1105,7 +1113,7 @@ configure_mode_settings(const std::shared_ptr<settings>& _config)
|
||||
};
|
||||
|
||||
auto _use_causal = get_setting_value<bool>("OMNITRACE_USE_CAUSAL");
|
||||
if(_use_causal.first && _use_causal.second) set_env("OMNITRACE_MODE", "causal", 1);
|
||||
if(_use_causal && *_use_causal) set_env("OMNITRACE_MODE", "causal", 1);
|
||||
|
||||
if(get_mode() == Mode::Coverage)
|
||||
{
|
||||
|
||||
@@ -126,15 +126,15 @@ set_default_setting_value(const std::string& _name, Tp&& _v)
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
std::pair<bool, Tp>
|
||||
std::optional<Tp>
|
||||
get_setting_value(const std::string& _name)
|
||||
{
|
||||
auto _instance = tim::settings::shared_instance();
|
||||
if(!_instance) return std::make_pair(false, Tp{});
|
||||
if(!_instance) return std::optional<Tp>{};
|
||||
auto _setting = _instance->find(_name);
|
||||
if(_setting == _instance->end() || !_setting->second)
|
||||
return std::make_pair(false, Tp{});
|
||||
return _setting->second->get<Tp>();
|
||||
if(_setting == _instance->end() || !_setting->second) return std::optional<Tp>{};
|
||||
auto&& _ret = _setting->second->get<Tp>();
|
||||
return (_ret.first) ? std::optional<Tp>{ _ret.second } : std::optional<Tp>{};
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
@@ -232,9 +232,10 @@ spec::spec(const std::string& _clock_id, double _delay, double _dur, uint64_t _n
|
||||
{}
|
||||
|
||||
spec::spec(const std::string& _line)
|
||||
: spec{ config::get_setting_value<std::string>("OMNITRACE_TRACE_PERIOD_CLOCK_ID").second,
|
||||
config::get_setting_value<double>("OMNITRACE_TRACE_DELAY").second,
|
||||
config::get_setting_value<double>("OMNITRACE_TRACE_DURATION").second }
|
||||
: spec{ config::get_setting_value<std::string>("OMNITRACE_TRACE_PERIOD_CLOCK_ID")
|
||||
.value_or("CLOCK_REALTIME"),
|
||||
config::get_setting_value<double>("OMNITRACE_TRACE_DELAY").value_or(0.0),
|
||||
config::get_setting_value<double>("OMNITRACE_TRACE_DURATION").value_or(0.0) }
|
||||
{
|
||||
auto _delim = tim::delimit(_line, ":");
|
||||
if(!_delim.empty()) delay = utility::convert<double>(_delim.at(0));
|
||||
@@ -300,12 +301,13 @@ get_trace_specs()
|
||||
auto _v = std::vector<constraint::spec>{};
|
||||
|
||||
{
|
||||
auto _delay_v = config::get_setting_value<double>("OMNITRACE_TRACE_DELAY").second;
|
||||
auto _delay_v =
|
||||
config::get_setting_value<double>("OMNITRACE_TRACE_DELAY").value_or(0.0);
|
||||
auto _duration_v =
|
||||
config::get_setting_value<double>("OMNITRACE_TRACE_DURATION").second;
|
||||
config::get_setting_value<double>("OMNITRACE_TRACE_DURATION").value_or(0.0);
|
||||
auto _clock_v = find_clock_identifier(
|
||||
config::get_setting_value<std::string>("OMNITRACE_TRACE_PERIOD_CLOCK_ID")
|
||||
.second);
|
||||
.value_or("CLOCK_REALTIME"));
|
||||
|
||||
if(_delay_v > 0.0 || _duration_v > 0.0)
|
||||
{
|
||||
@@ -315,7 +317,8 @@ get_trace_specs()
|
||||
|
||||
{
|
||||
auto _periods_v =
|
||||
config::get_setting_value<std::string>("OMNITRACE_TRACE_PERIODS").second;
|
||||
config::get_setting_value<std::string>("OMNITRACE_TRACE_PERIODS")
|
||||
.value_or("");
|
||||
if(!_periods_v.empty())
|
||||
{
|
||||
for(auto itr : tim::delimit(_periods_v, " ;\t\n"))
|
||||
|
||||
@@ -96,8 +96,8 @@ auto&
|
||||
get_engine()
|
||||
{
|
||||
static auto _seed = []() -> hash_value_t {
|
||||
auto _seed_v =
|
||||
config::get_setting_value<uint64_t>("OMNITRACE_CAUSAL_RANDOM_SEED").second;
|
||||
auto _seed_v = config::get_setting_value<uint64_t>("OMNITRACE_CAUSAL_RANDOM_SEED")
|
||||
.value_or(0);
|
||||
if(_seed_v == 0) _seed_v = std::random_device{}();
|
||||
return _seed_v;
|
||||
}();
|
||||
@@ -138,7 +138,7 @@ get_filters(std::set<binary::scope_filter::filter_scope> _scopes = {
|
||||
|
||||
bool _use_default_excludes =
|
||||
config::get_setting_value<bool>("OMNITRACE_CAUSAL_FUNCTION_EXCLUDE_DEFAULTS")
|
||||
.second;
|
||||
.value_or(true);
|
||||
|
||||
if(_use_default_excludes && _scopes.count(sf::FUNCTION_FILTER) > 0)
|
||||
{
|
||||
@@ -471,9 +471,9 @@ perform_experiment_impl(std::shared_ptr<std::promise<void>> _started) // NOLINT
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds{ 10 });
|
||||
|
||||
double _delay_sec =
|
||||
config::get_setting_value<double>("OMNITRACE_CAUSAL_DELAY").second;
|
||||
config::get_setting_value<double>("OMNITRACE_CAUSAL_DELAY").value_or(0.0);
|
||||
double _duration_sec =
|
||||
config::get_setting_value<double>("OMNITRACE_CAUSAL_DURATION").second;
|
||||
config::get_setting_value<double>("OMNITRACE_CAUSAL_DURATION").value_or(0.0);
|
||||
auto _duration_nsec = duration_nsec_t{ _duration_sec * units::sec };
|
||||
|
||||
if(_delay_sec > 0.0)
|
||||
|
||||
@@ -493,7 +493,7 @@ experiment::save_experiments(std::string _fname_base, const filename_config_t& _
|
||||
}
|
||||
|
||||
bool _causal_output_reset =
|
||||
config::get_setting_value<bool>("OMNITRACE_CAUSAL_FILE_RESET").second;
|
||||
config::get_setting_value<bool>("OMNITRACE_CAUSAL_FILE_RESET").value_or(false);
|
||||
|
||||
// if(current_record.experiments.empty()) return;
|
||||
|
||||
|
||||
@@ -222,9 +222,8 @@ post_process()
|
||||
|
||||
auto _get_setting = [](const std::string& _v) {
|
||||
auto&& _b = config::get_setting_value<bool>(_v);
|
||||
OMNITRACE_CI_THROW(!_b.first, "Error! No configuration setting named '%s'",
|
||||
_v.c_str());
|
||||
return (_b.first) ? _b.second : true;
|
||||
OMNITRACE_CI_THROW(!_b, "Error! No configuration setting named '%s'", _v.c_str());
|
||||
return _b.value_or(true);
|
||||
};
|
||||
|
||||
auto _text_output = _get_setting("OMNITRACE_TEXT_OUTPUT");
|
||||
|
||||
@@ -269,10 +269,10 @@ extern "C"
|
||||
|
||||
_name_len_limit = omnitrace::config::get_setting_value<int64_t>(
|
||||
"OMNITRACE_KOKKOSP_NAME_LENGTH_MAX")
|
||||
.second;
|
||||
.value_or(_name_len_limit);
|
||||
_kp_prefix =
|
||||
omnitrace::config::get_setting_value<std::string>("OMNITRACE_KOKKOSP_PREFIX")
|
||||
.second;
|
||||
.value_or(_kp_prefix);
|
||||
}
|
||||
|
||||
void kokkosp_finalize_library()
|
||||
|
||||
@@ -22,8 +22,10 @@
|
||||
|
||||
#include "library/roctracer.hpp"
|
||||
#include "core/components/fwd.hpp"
|
||||
#include "core/concepts.hpp"
|
||||
#include "core/config.hpp"
|
||||
#include "core/debug.hpp"
|
||||
#include "core/locking.hpp"
|
||||
#include "library/components/category_region.hpp"
|
||||
#include "library/critical_trace.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
@@ -67,6 +69,14 @@ namespace omnitrace
|
||||
{
|
||||
namespace
|
||||
{
|
||||
template <typename Tp, typename CategoryT = category::roctracer>
|
||||
auto&
|
||||
roctracer_type_mutex(uint64_t _n = threading::get_id())
|
||||
{
|
||||
return tim::type_mutex<Tp, CategoryT, max_supported_threads, locking::atomic_mutex>(
|
||||
_n % max_supported_threads);
|
||||
}
|
||||
|
||||
std::string
|
||||
hip_api_string(hip_api_id_t id, const hip_api_data_t* data)
|
||||
{
|
||||
@@ -163,8 +173,7 @@ using key_data_mutex_t = std::decay_t<decltype(get_roctracer_key_data())>;
|
||||
auto&
|
||||
get_hip_activity_mutex(int64_t _tid = threading::get_id())
|
||||
{
|
||||
return tim::type_mutex<hip_activity_mutex_t, category::roctracer,
|
||||
max_supported_threads>(_tid);
|
||||
return roctracer_type_mutex<hip_activity_mutex_t, category::roctracer>(_tid);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@@ -422,8 +431,8 @@ void
|
||||
hip_exec_activity_callbacks(int64_t _tid)
|
||||
{
|
||||
// OMNITRACE_ROCTRACER_CALL(roctracer_flush_activity());
|
||||
tim::auto_lock_t _lk{ get_hip_activity_mutex(_tid) };
|
||||
auto& _async_ops = get_hip_activity_callbacks(_tid);
|
||||
locking::atomic_lock _lk{ get_hip_activity_mutex(_tid) };
|
||||
auto& _async_ops = get_hip_activity_callbacks(_tid);
|
||||
if(!_async_ops) return;
|
||||
for(auto& itr : *_async_ops)
|
||||
{
|
||||
@@ -434,7 +443,7 @@ hip_exec_activity_callbacks(int64_t _tid)
|
||||
|
||||
namespace
|
||||
{
|
||||
thread_local std::unordered_map<size_t, size_t> gpu_cids = {};
|
||||
thread_local std::unordered_map<size_t, size_t> gpu_crit_cids = {};
|
||||
}
|
||||
|
||||
void
|
||||
@@ -449,7 +458,7 @@ roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
|
||||
if(domain != ACTIVITY_DOMAIN_ROCTX) return;
|
||||
|
||||
static auto _range_map = std::unordered_map<roctx_range_id_t, std::string>{};
|
||||
static auto _range_lock = std::mutex{};
|
||||
static auto _range_lock = locking::atomic_mutex{};
|
||||
const auto* _data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
|
||||
static thread_local auto _range_stack = std::vector<std::string>{};
|
||||
|
||||
@@ -482,7 +491,7 @@ roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
|
||||
case ROCTX_API_ID_roctxRangeStartA:
|
||||
{
|
||||
{
|
||||
std::unique_lock<std::mutex> _lk{ _range_lock, std::defer_lock };
|
||||
locking::atomic_lock _lk{ _range_lock, std::defer_lock };
|
||||
if(!_lk.owns_lock()) _lk.lock();
|
||||
_range_map.emplace(roctx_range_id_t{ _data->args.id },
|
||||
std::string{ _data->args.message });
|
||||
@@ -495,7 +504,7 @@ roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
|
||||
{
|
||||
std::string_view _message = {};
|
||||
{
|
||||
std::unique_lock<std::mutex> _lk{ _range_lock, std::defer_lock };
|
||||
locking::atomic_lock _lk{ _range_lock, std::defer_lock };
|
||||
if(!_lk.owns_lock()) _lk.lock();
|
||||
auto itr = _range_map.find(roctx_range_id_t{ _data->args.id });
|
||||
OMNITRACE_CI_THROW(itr == _range_map.end(),
|
||||
@@ -571,13 +580,13 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
|
||||
op_name, cid, data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
|
||||
|
||||
int64_t _ts = comp::wall_clock::record();
|
||||
auto _tid = threading::get_id();
|
||||
uint64_t _cid = 0;
|
||||
uint64_t _parent_cid = 0;
|
||||
uint32_t _depth = 0;
|
||||
uintptr_t _queue = 0;
|
||||
auto _corr_id = data->correlation_id;
|
||||
int64_t _ts = comp::wall_clock::record();
|
||||
auto _tid = threading::get_id();
|
||||
uint64_t _crit_cid = 0;
|
||||
uint64_t _parent_crit_cid = 0;
|
||||
uint32_t _depth = 0;
|
||||
uintptr_t _queue = 0;
|
||||
auto _roct_cid = data->correlation_id;
|
||||
|
||||
#define OMNITRACE_HIP_API_QUEUE_CASE(API_FUNC, VARIABLE) \
|
||||
case HIP_API_ID_##API_FUNC: \
|
||||
@@ -713,37 +722,67 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
|
||||
{
|
||||
if(get_use_perfetto() || get_use_timemory() || get_use_rocm_smi())
|
||||
{
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<key_data_mutex_t>() };
|
||||
get_roctracer_key_data().emplace(_corr_id, _name);
|
||||
get_roctracer_tid_data().emplace(_corr_id, _tid);
|
||||
locking::atomic_lock _lk{ roctracer_type_mutex<key_data_mutex_t>() };
|
||||
get_roctracer_key_data().emplace(_roct_cid, _name);
|
||||
get_roctracer_tid_data().emplace(_roct_cid, _tid);
|
||||
}
|
||||
}
|
||||
|
||||
std::tie(_cid, _parent_cid, _depth) = create_cpu_cid_entry();
|
||||
std::tie(_crit_cid, _parent_crit_cid, _depth) = create_cpu_cid_entry();
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
static auto _compact_annotations =
|
||||
config::get_setting_value<bool>(
|
||||
"OMNITRACE_PERFETTO_COMPACT_ROCTRACER_ANNOTATIONS")
|
||||
.value_or(false);
|
||||
|
||||
auto _api_id = static_cast<hip_api_id_t>(cid);
|
||||
tracing::push_perfetto_ts(
|
||||
category::rocm_hip{}, op_name, _ts, ::perfetto::Flow::ProcessScoped(_cid),
|
||||
category::rocm_hip{}, op_name, _ts,
|
||||
::perfetto::Flow::ProcessScoped(_roct_cid),
|
||||
[&](::perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(ctx, "begin_ns", _ts);
|
||||
tracing::add_perfetto_annotation(ctx, "pcid", _parent_cid);
|
||||
tracing::add_perfetto_annotation(ctx, "cid", _crit_cid);
|
||||
tracing::add_perfetto_annotation(ctx, "pcid", _parent_crit_cid);
|
||||
tracing::add_perfetto_annotation(ctx, "device", _device_id);
|
||||
tracing::add_perfetto_annotation(ctx, "tid", _tid);
|
||||
tracing::add_perfetto_annotation(ctx, "depth", _depth);
|
||||
tracing::add_perfetto_annotation(ctx, "corr_id", _corr_id);
|
||||
tracing::add_perfetto_annotation(ctx, "args",
|
||||
hip_api_string(_api_id, data));
|
||||
tracing::add_perfetto_annotation(ctx, "corr_id", _roct_cid);
|
||||
if(_compact_annotations)
|
||||
{
|
||||
tracing::add_perfetto_annotation(
|
||||
ctx, "args", hip_api_string(_api_id, data));
|
||||
}
|
||||
else
|
||||
{
|
||||
auto _args = std::string{ hip_api_string(_api_id, data) };
|
||||
if(!_args.empty())
|
||||
{
|
||||
for(auto itr : tim::delimit(_args, ","))
|
||||
{
|
||||
if(itr.empty()) continue;
|
||||
auto _bpos = itr.find_first_not_of(' ');
|
||||
auto _epos = itr.find_last_not_of(' ');
|
||||
if(_epos > _bpos)
|
||||
itr = itr.substr(_bpos, (_epos - _bpos) + 1);
|
||||
auto _pos = itr.find('=');
|
||||
if(_pos != std::string::npos)
|
||||
tracing::add_perfetto_annotation(
|
||||
ctx, itr.substr(0, _pos),
|
||||
itr.substr(_pos + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
if(get_use_timemory())
|
||||
{
|
||||
auto itr = get_roctracer_hip_data()->emplace(
|
||||
_corr_id, roctracer_hip_bundle_t{ op_name });
|
||||
_roct_cid, roctracer_hip_bundle_t{ op_name });
|
||||
if(itr.second)
|
||||
{
|
||||
itr.first->second.start();
|
||||
@@ -757,12 +796,12 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
|
||||
if(get_use_critical_trace() || get_use_rocm_smi())
|
||||
{
|
||||
add_critical_trace<Device::CPU, Phase::BEGIN>(
|
||||
_tid, _cid, _corr_id, _parent_cid, _ts, 0, _device_id, _queue,
|
||||
_tid, _crit_cid, _roct_cid, _parent_crit_cid, _ts, 0, _device_id, _queue,
|
||||
critical_trace::add_hash_id(op_name), _depth);
|
||||
}
|
||||
|
||||
get_roctracer_cid_data(_tid).emplace(
|
||||
_corr_id, cid_data{ _cid, _parent_cid, _depth, _queue });
|
||||
_roct_cid, cid_data{ _crit_cid, _parent_crit_cid, _depth, _queue });
|
||||
|
||||
hip_exec_activity_callbacks(_tid);
|
||||
}
|
||||
@@ -770,8 +809,8 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
|
||||
{
|
||||
hip_exec_activity_callbacks(_tid);
|
||||
|
||||
std::tie(_cid, _parent_cid, _depth, std::ignore) =
|
||||
get_roctracer_cid_data(_tid).at(_corr_id);
|
||||
std::tie(_crit_cid, _parent_crit_cid, _depth, std::ignore) =
|
||||
get_roctracer_cid_data(_tid).at(_roct_cid);
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
@@ -785,9 +824,9 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
|
||||
}
|
||||
if(get_use_timemory())
|
||||
{
|
||||
auto _stop = [&_corr_id](int64_t _tid_v) {
|
||||
auto _stop = [&_roct_cid](int64_t _tid_v) {
|
||||
auto& _data = get_roctracer_hip_data(_tid_v);
|
||||
auto itr = _data->find(_corr_id);
|
||||
auto itr = _data->find(_roct_cid);
|
||||
if(itr != get_roctracer_hip_data()->end())
|
||||
{
|
||||
itr->second.stop();
|
||||
@@ -807,8 +846,8 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
|
||||
if(get_use_critical_trace() || get_use_rocm_smi())
|
||||
{
|
||||
add_critical_trace<Device::CPU, Phase::END>(
|
||||
_tid, _cid, _corr_id, _parent_cid, _ts, _ts, _device_id, _queue,
|
||||
critical_trace::add_hash_id(op_name), _depth);
|
||||
_tid, _crit_cid, _roct_cid, _parent_crit_cid, _ts, _ts, _device_id,
|
||||
_queue, critical_trace::add_hash_id(op_name), _depth);
|
||||
}
|
||||
}
|
||||
tim::consume_parameters(arg);
|
||||
@@ -861,33 +900,33 @@ hip_activity_callback(const char* begin, const char* end, void* arg)
|
||||
|
||||
const char* op_name =
|
||||
roctracer_op_string(record->domain, record->op, record->kind);
|
||||
auto _ns_skew = get_clock_skew();
|
||||
uint64_t _beg_ns = record->begin_ns + _ns_skew;
|
||||
uint64_t _end_ns = record->end_ns + _ns_skew;
|
||||
auto _corr_id = record->correlation_id;
|
||||
auto _ns_skew = get_clock_skew();
|
||||
uint64_t _beg_ns = record->begin_ns + _ns_skew;
|
||||
uint64_t _end_ns = record->end_ns + _ns_skew;
|
||||
auto _roct_cid = record->correlation_id;
|
||||
|
||||
auto& _keys = get_roctracer_key_data();
|
||||
auto& _tids = get_roctracer_tid_data();
|
||||
|
||||
int16_t _depth = 0; // depth of kernel launch
|
||||
int64_t _tid = 0; // thread id
|
||||
uint64_t _cid = 0; // correlation id
|
||||
uint64_t _pcid = 0; // parent corr_id
|
||||
int32_t _devid = record->device_id; // device id
|
||||
int64_t _queid = record->queue_id; // queue id
|
||||
uintptr_t _queue = 0; // Host queue (stream)
|
||||
auto _laps = _indexes[_corr_id]++; // see note #1
|
||||
int16_t _depth = 0; // depth of kernel launch
|
||||
int64_t _tid = 0; // thread id
|
||||
uint64_t _crit_cid = 0; // correlation id
|
||||
uint64_t _pcid = 0; // parent corr_id
|
||||
int32_t _devid = record->device_id; // device id
|
||||
int64_t _queid = record->queue_id; // queue id
|
||||
uintptr_t _queue = 0; // Host queue (stream)
|
||||
auto _laps = _indexes[_roct_cid]++; // see note #1
|
||||
const char* _name = nullptr;
|
||||
bool _found = false;
|
||||
bool _critical_trace = get_use_critical_trace() || get_use_rocm_smi();
|
||||
|
||||
{
|
||||
tim::auto_lock_t _lk{ tim::type_mutex<key_data_mutex_t>() };
|
||||
if(_tids.find(_corr_id) != _tids.end())
|
||||
locking::atomic_lock _lk{ roctracer_type_mutex<key_data_mutex_t>() };
|
||||
if(_tids.find(_roct_cid) != _tids.end())
|
||||
{
|
||||
_found = true;
|
||||
_tid = _tids.at(_corr_id);
|
||||
auto itr = _keys.find(_corr_id);
|
||||
_tid = _tids.at(_roct_cid);
|
||||
auto itr = _keys.find(_roct_cid);
|
||||
if(itr != _keys.end()) _name = itr->second;
|
||||
}
|
||||
}
|
||||
@@ -897,9 +936,9 @@ hip_activity_callback(const char* begin, const char* end, void* arg)
|
||||
|
||||
if(_critical_trace)
|
||||
{
|
||||
auto& _cids = get_roctracer_cid_data(_tid);
|
||||
if(_cids.find(_corr_id) != _cids.end())
|
||||
std::tie(_cid, _pcid, _depth, _queue) = _cids.at(_corr_id);
|
||||
auto& _crit_cids = get_roctracer_cid_data(_tid);
|
||||
if(_crit_cids.find(_roct_cid) != _crit_cids.end())
|
||||
std::tie(_crit_cid, _pcid, _depth, _queue) = _crit_cids.at(_roct_cid);
|
||||
else
|
||||
{
|
||||
OMNITRACE_VERBOSE_F(3,
|
||||
@@ -962,12 +1001,13 @@ hip_activity_callback(const char* begin, const char* end, void* arg)
|
||||
assert(_end_ns >= _beg_ns);
|
||||
tracing::push_perfetto_track(
|
||||
category::device_hip{}, _kernel_names.at(_name).c_str(), _track, _beg_ns,
|
||||
::perfetto::Flow::ProcessScoped(_cid), [&](::perfetto::EventContext ctx) {
|
||||
::perfetto::Flow::ProcessScoped(_roct_cid),
|
||||
[&](::perfetto::EventContext ctx) {
|
||||
if(config::get_perfetto_annotations())
|
||||
{
|
||||
tracing::add_perfetto_annotation(ctx, "begin_ns", _beg_ns);
|
||||
tracing::add_perfetto_annotation(ctx, "end_ns", _end_ns);
|
||||
tracing::add_perfetto_annotation(ctx, "corr_id", _corr_id);
|
||||
tracing::add_perfetto_annotation(ctx, "corr_id", _roct_cid);
|
||||
tracing::add_perfetto_annotation(ctx, "device", _devid);
|
||||
tracing::add_perfetto_annotation(ctx, "queue", _queid);
|
||||
tracing::add_perfetto_annotation(ctx, "tid", _tid);
|
||||
@@ -985,8 +1025,8 @@ hip_activity_callback(const char* begin, const char* end, void* arg)
|
||||
auto _hash = critical_trace::add_hash_id(_name);
|
||||
uint16_t _prio = _laps + 1; // priority
|
||||
add_critical_trace<Device::GPU, Phase::DELTA, false>(
|
||||
_tid, _cid, _corr_id, _cid, _beg_ns, _end_ns, _devid, _queid, _hash,
|
||||
_depth + 1, _prio);
|
||||
_tid, _crit_cid, _roct_cid, _crit_cid, _beg_ns, _end_ns, _devid, _queid,
|
||||
_hash, _depth + 1, _prio);
|
||||
}
|
||||
|
||||
if(_found && _name != nullptr && get_use_timemory())
|
||||
@@ -1004,8 +1044,8 @@ hip_activity_callback(const char* begin, const char* end, void* arg)
|
||||
_bundle.pop();
|
||||
};
|
||||
|
||||
auto& _async_ops = get_hip_activity_callbacks(_tid);
|
||||
tim::auto_lock_t _lk{ get_hip_activity_mutex(_tid) };
|
||||
auto& _async_ops = get_hip_activity_callbacks(_tid);
|
||||
locking::atomic_lock _lk{ get_hip_activity_mutex(_tid) };
|
||||
_async_ops->emplace_back(std::move(_func));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -351,6 +351,13 @@ function(OMNITRACE_ADD_TEST)
|
||||
"${_KWARGS}"
|
||||
${ARGN})
|
||||
|
||||
foreach(_PREFIX PRELOAD RUNTIME REWRITE REWRITE_RUN BASELINE)
|
||||
if("${${_PREFIX}_FAIL_REGEX}" STREQUAL "")
|
||||
set(${_PREFIX}_FAIL_REGEX
|
||||
"(### ERROR ###|address of faulting memory reference)")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(TEST_GPU AND NOT _VALID_GPU)
|
||||
omnitrace_message(STATUS
|
||||
"${TEST_NAME} requires a GPU and no valid GPUs were found")
|
||||
@@ -390,6 +397,33 @@ function(OMNITRACE_ADD_TEST)
|
||||
|
||||
list(APPEND TEST_ENVIRONMENT "OMNITRACE_CI=ON")
|
||||
|
||||
if(TEST_GPU)
|
||||
list(APPEND TEST_LABELS "gpu")
|
||||
|
||||
if(NOT "OMNITRACE_USE_ROCTRACER=OFF" IN_LIST TEST_ENVIRONMENT)
|
||||
list(APPEND TEST_LABELS "roctracer")
|
||||
endif()
|
||||
|
||||
if(NOT "OMNITRACE_USE_ROCM_SMI=OFF" IN_LIST TEST_ENVIRONMENT)
|
||||
list(APPEND TEST_LABELS "rocm-smi")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if("OMNITRACE_USE_ROCTRACER=ON" IN_LIST TEST_ENVIRONMENT AND NOT "roctracer" IN_LIST
|
||||
TEST_ENVIRONMENT)
|
||||
list(APPEND TEST_LABELS "roctracer")
|
||||
endif()
|
||||
|
||||
if("OMNITRACE_USE_ROCM_SMI=ON" IN_LIST TEST_ENVIRONMENT AND NOT "rocm-smi" IN_LIST
|
||||
TEST_ENVIRONMENT)
|
||||
list(APPEND TEST_LABELS "rocm-smi")
|
||||
endif()
|
||||
|
||||
if("OMNITRACE_USE_ROCPROFILER=ON" IN_LIST TEST_ENVIRONMENT
|
||||
AND NOT "rocprofiler" IN_LIST TEST_ENVIRONMENT)
|
||||
list(APPEND TEST_LABELS "rocprofiler")
|
||||
endif()
|
||||
|
||||
if(TARGET ${TEST_TARGET})
|
||||
if(DEFINED TEST_MPI
|
||||
AND ${TEST_MPI}
|
||||
|
||||
Reference in New Issue
Block a user