Roctracer perfetto flow fixes (#267)

* testing label updates

- automatically add "gpu", "roctracer", "rocm-smi", and "rocprofiler" test labels when appropriate

* Bump version to v1.9.1

* roctracer and config updates

- fix perfetto::Flow
  - use roctracer correlation ID instead of critical trace correlation ID
- renamed ambiguous _cid, _parent_cid, _corr_id variables to _crit_cid, _parent_crit_cid, _roct_cid
- use atomic_{mutex,lock} instead of STL mutex/lock
- support for individual perfetto annotations for HIP API args
- OMNITRACE_PERFETTO_COMPACT_ROCTRACER_ANNOTATIONS option for controlling compact vs. individual perfetto annotations for HIP API args

* Update timemory submodule

- argparser updates
  - help prints to std::cout by default now
  - supports setting custom ostream

* cmake formatting

* config::get_setting_value updates

- config::get_setting_value returns std::optional instead of std::pair<bool, Tp>
This commit is contained in:
Jonathan R. Madsen
2023-03-23 01:13:12 -05:00
committed by GitHub
parent 9eafb23602
commit 279a8e0952
11 changed files with 173 additions and 89 deletions
+1 -1
View File
@@ -1 +1 @@
1.9.0
1.9.1
+14 -6
View File
@@ -406,11 +406,7 @@ configure_settings(bool _init)
"durations are needed, see OMNITRACE_TRACE_PERIODS.",
0.0, "trace", "profile", "perfetto", "timemory");
auto _clock_s =
config::get_setting_value<std::string>("OMNITRACE_TRACE_PERIOD_CLOCK_ID").second;
auto _clock_choices = std::vector<std::string>{};
for(const auto& itr : constraint::get_valid_clock_ids())
{
_clock_choices.emplace_back(
@@ -696,6 +692,18 @@ configure_settings(bool _init)
"feature may dramatically reduce the size of the trace",
true, "perfetto", "data", "debugging", "advanced");
OMNITRACE_CONFIG_SETTING(
bool, "OMNITRACE_PERFETTO_COMPACT_ROCTRACER_ANNOTATIONS",
"When PERFETTO_ANNOTATIONS, USE_ROCTRACER, and ROCTRACER_HIP_API are all "
"enabled, enabling this option will result in the arg information for HIP API "
"calls to all be within one annotation (e.g., args=\"stream=0x0, dst=0x1F, "
"sizeBytes=64, src=0x08, kind=1\"). When disabled, each parameter will be an "
"individual annotation (e.g. stream, dst, sizeBytes, etc.). The benefit of the "
"former is that it is faster to serialize and consumes less file space; the "
"benefit of the latter is that it becomes much easier to find slices in the "
"trace with the same value",
false, "perfetto", "data", "debugging", "roctracer", "rocm", "advanced");
OMNITRACE_CONFIG_SETTING(
uint64_t, "OMNITRACE_THREAD_POOL_SIZE",
"Max number of threads for processing background tasks",
@@ -1095,7 +1103,7 @@ configure_mode_settings(const std::shared_ptr<settings>& _config)
}
else
{
bool _changed = get_setting_value<bool>(_name).second != _v;
bool _changed = get_setting_value<bool>(_name).value_or(!_v) != _v;
OMNITRACE_BASIC_VERBOSE(
1 && _changed,
"[configure_mode_settings] Overriding %s to %s in %s mode...\n",
@@ -1105,7 +1113,7 @@ configure_mode_settings(const std::shared_ptr<settings>& _config)
};
auto _use_causal = get_setting_value<bool>("OMNITRACE_USE_CAUSAL");
if(_use_causal.first && _use_causal.second) set_env("OMNITRACE_MODE", "causal", 1);
if(_use_causal && *_use_causal) set_env("OMNITRACE_MODE", "causal", 1);
if(get_mode() == Mode::Coverage)
{
+5 -5
View File
@@ -126,15 +126,15 @@ set_default_setting_value(const std::string& _name, Tp&& _v)
}
template <typename Tp>
std::pair<bool, Tp>
std::optional<Tp>
get_setting_value(const std::string& _name)
{
auto _instance = tim::settings::shared_instance();
if(!_instance) return std::make_pair(false, Tp{});
if(!_instance) return std::optional<Tp>{};
auto _setting = _instance->find(_name);
if(_setting == _instance->end() || !_setting->second)
return std::make_pair(false, Tp{});
return _setting->second->get<Tp>();
if(_setting == _instance->end() || !_setting->second) return std::optional<Tp>{};
auto&& _ret = _setting->second->get<Tp>();
return (_ret.first) ? std::optional<Tp>{ _ret.second } : std::optional<Tp>{};
}
//
+10 -7
View File
@@ -232,9 +232,10 @@ spec::spec(const std::string& _clock_id, double _delay, double _dur, uint64_t _n
{}
spec::spec(const std::string& _line)
: spec{ config::get_setting_value<std::string>("OMNITRACE_TRACE_PERIOD_CLOCK_ID").second,
config::get_setting_value<double>("OMNITRACE_TRACE_DELAY").second,
config::get_setting_value<double>("OMNITRACE_TRACE_DURATION").second }
: spec{ config::get_setting_value<std::string>("OMNITRACE_TRACE_PERIOD_CLOCK_ID")
.value_or("CLOCK_REALTIME"),
config::get_setting_value<double>("OMNITRACE_TRACE_DELAY").value_or(0.0),
config::get_setting_value<double>("OMNITRACE_TRACE_DURATION").value_or(0.0) }
{
auto _delim = tim::delimit(_line, ":");
if(!_delim.empty()) delay = utility::convert<double>(_delim.at(0));
@@ -300,12 +301,13 @@ get_trace_specs()
auto _v = std::vector<constraint::spec>{};
{
auto _delay_v = config::get_setting_value<double>("OMNITRACE_TRACE_DELAY").second;
auto _delay_v =
config::get_setting_value<double>("OMNITRACE_TRACE_DELAY").value_or(0.0);
auto _duration_v =
config::get_setting_value<double>("OMNITRACE_TRACE_DURATION").second;
config::get_setting_value<double>("OMNITRACE_TRACE_DURATION").value_or(0.0);
auto _clock_v = find_clock_identifier(
config::get_setting_value<std::string>("OMNITRACE_TRACE_PERIOD_CLOCK_ID")
.second);
.value_or("CLOCK_REALTIME"));
if(_delay_v > 0.0 || _duration_v > 0.0)
{
@@ -315,7 +317,8 @@ get_trace_specs()
{
auto _periods_v =
config::get_setting_value<std::string>("OMNITRACE_TRACE_PERIODS").second;
config::get_setting_value<std::string>("OMNITRACE_TRACE_PERIODS")
.value_or("");
if(!_periods_v.empty())
{
for(auto itr : tim::delimit(_periods_v, " ;\t\n"))
+5 -5
View File
@@ -96,8 +96,8 @@ auto&
get_engine()
{
static auto _seed = []() -> hash_value_t {
auto _seed_v =
config::get_setting_value<uint64_t>("OMNITRACE_CAUSAL_RANDOM_SEED").second;
auto _seed_v = config::get_setting_value<uint64_t>("OMNITRACE_CAUSAL_RANDOM_SEED")
.value_or(0);
if(_seed_v == 0) _seed_v = std::random_device{}();
return _seed_v;
}();
@@ -138,7 +138,7 @@ get_filters(std::set<binary::scope_filter::filter_scope> _scopes = {
bool _use_default_excludes =
config::get_setting_value<bool>("OMNITRACE_CAUSAL_FUNCTION_EXCLUDE_DEFAULTS")
.second;
.value_or(true);
if(_use_default_excludes && _scopes.count(sf::FUNCTION_FILTER) > 0)
{
@@ -471,9 +471,9 @@ perform_experiment_impl(std::shared_ptr<std::promise<void>> _started) // NOLINT
std::this_thread::sleep_for(std::chrono::milliseconds{ 10 });
double _delay_sec =
config::get_setting_value<double>("OMNITRACE_CAUSAL_DELAY").second;
config::get_setting_value<double>("OMNITRACE_CAUSAL_DELAY").value_or(0.0);
double _duration_sec =
config::get_setting_value<double>("OMNITRACE_CAUSAL_DURATION").second;
config::get_setting_value<double>("OMNITRACE_CAUSAL_DURATION").value_or(0.0);
auto _duration_nsec = duration_nsec_t{ _duration_sec * units::sec };
if(_delay_sec > 0.0)
@@ -493,7 +493,7 @@ experiment::save_experiments(std::string _fname_base, const filename_config_t& _
}
bool _causal_output_reset =
config::get_setting_value<bool>("OMNITRACE_CAUSAL_FILE_RESET").second;
config::get_setting_value<bool>("OMNITRACE_CAUSAL_FILE_RESET").value_or(false);
// if(current_record.experiments.empty()) return;
+2 -3
View File
@@ -222,9 +222,8 @@ post_process()
auto _get_setting = [](const std::string& _v) {
auto&& _b = config::get_setting_value<bool>(_v);
OMNITRACE_CI_THROW(!_b.first, "Error! No configuration setting named '%s'",
_v.c_str());
return (_b.first) ? _b.second : true;
OMNITRACE_CI_THROW(!_b, "Error! No configuration setting named '%s'", _v.c_str());
return _b.value_or(true);
};
auto _text_output = _get_setting("OMNITRACE_TEXT_OUTPUT");
+2 -2
View File
@@ -269,10 +269,10 @@ extern "C"
_name_len_limit = omnitrace::config::get_setting_value<int64_t>(
"OMNITRACE_KOKKOSP_NAME_LENGTH_MAX")
.second;
.value_or(_name_len_limit);
_kp_prefix =
omnitrace::config::get_setting_value<std::string>("OMNITRACE_KOKKOSP_PREFIX")
.second;
.value_or(_kp_prefix);
}
void kokkosp_finalize_library()
+98 -58
View File
@@ -22,8 +22,10 @@
#include "library/roctracer.hpp"
#include "core/components/fwd.hpp"
#include "core/concepts.hpp"
#include "core/config.hpp"
#include "core/debug.hpp"
#include "core/locking.hpp"
#include "library/components/category_region.hpp"
#include "library/critical_trace.hpp"
#include "library/runtime.hpp"
@@ -67,6 +69,14 @@ namespace omnitrace
{
namespace
{
template <typename Tp, typename CategoryT = category::roctracer>
auto&
roctracer_type_mutex(uint64_t _n = threading::get_id())
{
return tim::type_mutex<Tp, CategoryT, max_supported_threads, locking::atomic_mutex>(
_n % max_supported_threads);
}
std::string
hip_api_string(hip_api_id_t id, const hip_api_data_t* data)
{
@@ -163,8 +173,7 @@ using key_data_mutex_t = std::decay_t<decltype(get_roctracer_key_data())>;
auto&
get_hip_activity_mutex(int64_t _tid = threading::get_id())
{
return tim::type_mutex<hip_activity_mutex_t, category::roctracer,
max_supported_threads>(_tid);
return roctracer_type_mutex<hip_activity_mutex_t, category::roctracer>(_tid);
}
} // namespace
@@ -422,8 +431,8 @@ void
hip_exec_activity_callbacks(int64_t _tid)
{
// OMNITRACE_ROCTRACER_CALL(roctracer_flush_activity());
tim::auto_lock_t _lk{ get_hip_activity_mutex(_tid) };
auto& _async_ops = get_hip_activity_callbacks(_tid);
locking::atomic_lock _lk{ get_hip_activity_mutex(_tid) };
auto& _async_ops = get_hip_activity_callbacks(_tid);
if(!_async_ops) return;
for(auto& itr : *_async_ops)
{
@@ -434,7 +443,7 @@ hip_exec_activity_callbacks(int64_t _tid)
namespace
{
thread_local std::unordered_map<size_t, size_t> gpu_cids = {};
thread_local std::unordered_map<size_t, size_t> gpu_crit_cids = {};
}
void
@@ -449,7 +458,7 @@ roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
if(domain != ACTIVITY_DOMAIN_ROCTX) return;
static auto _range_map = std::unordered_map<roctx_range_id_t, std::string>{};
static auto _range_lock = std::mutex{};
static auto _range_lock = locking::atomic_mutex{};
const auto* _data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
static thread_local auto _range_stack = std::vector<std::string>{};
@@ -482,7 +491,7 @@ roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
case ROCTX_API_ID_roctxRangeStartA:
{
{
std::unique_lock<std::mutex> _lk{ _range_lock, std::defer_lock };
locking::atomic_lock _lk{ _range_lock, std::defer_lock };
if(!_lk.owns_lock()) _lk.lock();
_range_map.emplace(roctx_range_id_t{ _data->args.id },
std::string{ _data->args.message });
@@ -495,7 +504,7 @@ roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
{
std::string_view _message = {};
{
std::unique_lock<std::mutex> _lk{ _range_lock, std::defer_lock };
locking::atomic_lock _lk{ _range_lock, std::defer_lock };
if(!_lk.owns_lock()) _lk.lock();
auto itr = _range_map.find(roctx_range_id_t{ _data->args.id });
OMNITRACE_CI_THROW(itr == _range_map.end(),
@@ -571,13 +580,13 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
op_name, cid, data->correlation_id,
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
int64_t _ts = comp::wall_clock::record();
auto _tid = threading::get_id();
uint64_t _cid = 0;
uint64_t _parent_cid = 0;
uint32_t _depth = 0;
uintptr_t _queue = 0;
auto _corr_id = data->correlation_id;
int64_t _ts = comp::wall_clock::record();
auto _tid = threading::get_id();
uint64_t _crit_cid = 0;
uint64_t _parent_crit_cid = 0;
uint32_t _depth = 0;
uintptr_t _queue = 0;
auto _roct_cid = data->correlation_id;
#define OMNITRACE_HIP_API_QUEUE_CASE(API_FUNC, VARIABLE) \
case HIP_API_ID_##API_FUNC: \
@@ -713,37 +722,67 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
{
if(get_use_perfetto() || get_use_timemory() || get_use_rocm_smi())
{
tim::auto_lock_t _lk{ tim::type_mutex<key_data_mutex_t>() };
get_roctracer_key_data().emplace(_corr_id, _name);
get_roctracer_tid_data().emplace(_corr_id, _tid);
locking::atomic_lock _lk{ roctracer_type_mutex<key_data_mutex_t>() };
get_roctracer_key_data().emplace(_roct_cid, _name);
get_roctracer_tid_data().emplace(_roct_cid, _tid);
}
}
std::tie(_cid, _parent_cid, _depth) = create_cpu_cid_entry();
std::tie(_crit_cid, _parent_crit_cid, _depth) = create_cpu_cid_entry();
if(get_use_perfetto())
{
static auto _compact_annotations =
config::get_setting_value<bool>(
"OMNITRACE_PERFETTO_COMPACT_ROCTRACER_ANNOTATIONS")
.value_or(false);
auto _api_id = static_cast<hip_api_id_t>(cid);
tracing::push_perfetto_ts(
category::rocm_hip{}, op_name, _ts, ::perfetto::Flow::ProcessScoped(_cid),
category::rocm_hip{}, op_name, _ts,
::perfetto::Flow::ProcessScoped(_roct_cid),
[&](::perfetto::EventContext ctx) {
if(config::get_perfetto_annotations())
{
tracing::add_perfetto_annotation(ctx, "begin_ns", _ts);
tracing::add_perfetto_annotation(ctx, "pcid", _parent_cid);
tracing::add_perfetto_annotation(ctx, "cid", _crit_cid);
tracing::add_perfetto_annotation(ctx, "pcid", _parent_crit_cid);
tracing::add_perfetto_annotation(ctx, "device", _device_id);
tracing::add_perfetto_annotation(ctx, "tid", _tid);
tracing::add_perfetto_annotation(ctx, "depth", _depth);
tracing::add_perfetto_annotation(ctx, "corr_id", _corr_id);
tracing::add_perfetto_annotation(ctx, "args",
hip_api_string(_api_id, data));
tracing::add_perfetto_annotation(ctx, "corr_id", _roct_cid);
if(_compact_annotations)
{
tracing::add_perfetto_annotation(
ctx, "args", hip_api_string(_api_id, data));
}
else
{
auto _args = std::string{ hip_api_string(_api_id, data) };
if(!_args.empty())
{
for(auto itr : tim::delimit(_args, ","))
{
if(itr.empty()) continue;
auto _bpos = itr.find_first_not_of(' ');
auto _epos = itr.find_last_not_of(' ');
if(_epos > _bpos)
itr = itr.substr(_bpos, (_epos - _bpos) + 1);
auto _pos = itr.find('=');
if(_pos != std::string::npos)
tracing::add_perfetto_annotation(
ctx, itr.substr(0, _pos),
itr.substr(_pos + 1));
}
}
}
}
});
}
if(get_use_timemory())
{
auto itr = get_roctracer_hip_data()->emplace(
_corr_id, roctracer_hip_bundle_t{ op_name });
_roct_cid, roctracer_hip_bundle_t{ op_name });
if(itr.second)
{
itr.first->second.start();
@@ -757,12 +796,12 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
if(get_use_critical_trace() || get_use_rocm_smi())
{
add_critical_trace<Device::CPU, Phase::BEGIN>(
_tid, _cid, _corr_id, _parent_cid, _ts, 0, _device_id, _queue,
_tid, _crit_cid, _roct_cid, _parent_crit_cid, _ts, 0, _device_id, _queue,
critical_trace::add_hash_id(op_name), _depth);
}
get_roctracer_cid_data(_tid).emplace(
_corr_id, cid_data{ _cid, _parent_cid, _depth, _queue });
_roct_cid, cid_data{ _crit_cid, _parent_crit_cid, _depth, _queue });
hip_exec_activity_callbacks(_tid);
}
@@ -770,8 +809,8 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
{
hip_exec_activity_callbacks(_tid);
std::tie(_cid, _parent_cid, _depth, std::ignore) =
get_roctracer_cid_data(_tid).at(_corr_id);
std::tie(_crit_cid, _parent_crit_cid, _depth, std::ignore) =
get_roctracer_cid_data(_tid).at(_roct_cid);
if(get_use_perfetto())
{
@@ -785,9 +824,9 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
}
if(get_use_timemory())
{
auto _stop = [&_corr_id](int64_t _tid_v) {
auto _stop = [&_roct_cid](int64_t _tid_v) {
auto& _data = get_roctracer_hip_data(_tid_v);
auto itr = _data->find(_corr_id);
auto itr = _data->find(_roct_cid);
if(itr != get_roctracer_hip_data()->end())
{
itr->second.stop();
@@ -807,8 +846,8 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
if(get_use_critical_trace() || get_use_rocm_smi())
{
add_critical_trace<Device::CPU, Phase::END>(
_tid, _cid, _corr_id, _parent_cid, _ts, _ts, _device_id, _queue,
critical_trace::add_hash_id(op_name), _depth);
_tid, _crit_cid, _roct_cid, _parent_crit_cid, _ts, _ts, _device_id,
_queue, critical_trace::add_hash_id(op_name), _depth);
}
}
tim::consume_parameters(arg);
@@ -861,33 +900,33 @@ hip_activity_callback(const char* begin, const char* end, void* arg)
const char* op_name =
roctracer_op_string(record->domain, record->op, record->kind);
auto _ns_skew = get_clock_skew();
uint64_t _beg_ns = record->begin_ns + _ns_skew;
uint64_t _end_ns = record->end_ns + _ns_skew;
auto _corr_id = record->correlation_id;
auto _ns_skew = get_clock_skew();
uint64_t _beg_ns = record->begin_ns + _ns_skew;
uint64_t _end_ns = record->end_ns + _ns_skew;
auto _roct_cid = record->correlation_id;
auto& _keys = get_roctracer_key_data();
auto& _tids = get_roctracer_tid_data();
int16_t _depth = 0; // depth of kernel launch
int64_t _tid = 0; // thread id
uint64_t _cid = 0; // correlation id
uint64_t _pcid = 0; // parent corr_id
int32_t _devid = record->device_id; // device id
int64_t _queid = record->queue_id; // queue id
uintptr_t _queue = 0; // Host queue (stream)
auto _laps = _indexes[_corr_id]++; // see note #1
int16_t _depth = 0; // depth of kernel launch
int64_t _tid = 0; // thread id
uint64_t _crit_cid = 0; // correlation id
uint64_t _pcid = 0; // parent corr_id
int32_t _devid = record->device_id; // device id
int64_t _queid = record->queue_id; // queue id
uintptr_t _queue = 0; // Host queue (stream)
auto _laps = _indexes[_roct_cid]++; // see note #1
const char* _name = nullptr;
bool _found = false;
bool _critical_trace = get_use_critical_trace() || get_use_rocm_smi();
{
tim::auto_lock_t _lk{ tim::type_mutex<key_data_mutex_t>() };
if(_tids.find(_corr_id) != _tids.end())
locking::atomic_lock _lk{ roctracer_type_mutex<key_data_mutex_t>() };
if(_tids.find(_roct_cid) != _tids.end())
{
_found = true;
_tid = _tids.at(_corr_id);
auto itr = _keys.find(_corr_id);
_tid = _tids.at(_roct_cid);
auto itr = _keys.find(_roct_cid);
if(itr != _keys.end()) _name = itr->second;
}
}
@@ -897,9 +936,9 @@ hip_activity_callback(const char* begin, const char* end, void* arg)
if(_critical_trace)
{
auto& _cids = get_roctracer_cid_data(_tid);
if(_cids.find(_corr_id) != _cids.end())
std::tie(_cid, _pcid, _depth, _queue) = _cids.at(_corr_id);
auto& _crit_cids = get_roctracer_cid_data(_tid);
if(_crit_cids.find(_roct_cid) != _crit_cids.end())
std::tie(_crit_cid, _pcid, _depth, _queue) = _crit_cids.at(_roct_cid);
else
{
OMNITRACE_VERBOSE_F(3,
@@ -962,12 +1001,13 @@ hip_activity_callback(const char* begin, const char* end, void* arg)
assert(_end_ns >= _beg_ns);
tracing::push_perfetto_track(
category::device_hip{}, _kernel_names.at(_name).c_str(), _track, _beg_ns,
::perfetto::Flow::ProcessScoped(_cid), [&](::perfetto::EventContext ctx) {
::perfetto::Flow::ProcessScoped(_roct_cid),
[&](::perfetto::EventContext ctx) {
if(config::get_perfetto_annotations())
{
tracing::add_perfetto_annotation(ctx, "begin_ns", _beg_ns);
tracing::add_perfetto_annotation(ctx, "end_ns", _end_ns);
tracing::add_perfetto_annotation(ctx, "corr_id", _corr_id);
tracing::add_perfetto_annotation(ctx, "corr_id", _roct_cid);
tracing::add_perfetto_annotation(ctx, "device", _devid);
tracing::add_perfetto_annotation(ctx, "queue", _queid);
tracing::add_perfetto_annotation(ctx, "tid", _tid);
@@ -985,8 +1025,8 @@ hip_activity_callback(const char* begin, const char* end, void* arg)
auto _hash = critical_trace::add_hash_id(_name);
uint16_t _prio = _laps + 1; // priority
add_critical_trace<Device::GPU, Phase::DELTA, false>(
_tid, _cid, _corr_id, _cid, _beg_ns, _end_ns, _devid, _queid, _hash,
_depth + 1, _prio);
_tid, _crit_cid, _roct_cid, _crit_cid, _beg_ns, _end_ns, _devid, _queid,
_hash, _depth + 1, _prio);
}
if(_found && _name != nullptr && get_use_timemory())
@@ -1004,8 +1044,8 @@ hip_activity_callback(const char* begin, const char* end, void* arg)
_bundle.pop();
};
auto& _async_ops = get_hip_activity_callbacks(_tid);
tim::auto_lock_t _lk{ get_hip_activity_mutex(_tid) };
auto& _async_ops = get_hip_activity_callbacks(_tid);
locking::atomic_lock _lk{ get_hip_activity_mutex(_tid) };
_async_ops->emplace_back(std::move(_func));
}
}
+34
View File
@@ -351,6 +351,13 @@ function(OMNITRACE_ADD_TEST)
"${_KWARGS}"
${ARGN})
foreach(_PREFIX PRELOAD RUNTIME REWRITE REWRITE_RUN BASELINE)
if("${${_PREFIX}_FAIL_REGEX}" STREQUAL "")
set(${_PREFIX}_FAIL_REGEX
"(### ERROR ###|address of faulting memory reference)")
endif()
endforeach()
if(TEST_GPU AND NOT _VALID_GPU)
omnitrace_message(STATUS
"${TEST_NAME} requires a GPU and no valid GPUs were found")
@@ -390,6 +397,33 @@ function(OMNITRACE_ADD_TEST)
list(APPEND TEST_ENVIRONMENT "OMNITRACE_CI=ON")
if(TEST_GPU)
list(APPEND TEST_LABELS "gpu")
if(NOT "OMNITRACE_USE_ROCTRACER=OFF" IN_LIST TEST_ENVIRONMENT)
list(APPEND TEST_LABELS "roctracer")
endif()
if(NOT "OMNITRACE_USE_ROCM_SMI=OFF" IN_LIST TEST_ENVIRONMENT)
list(APPEND TEST_LABELS "rocm-smi")
endif()
endif()
if("OMNITRACE_USE_ROCTRACER=ON" IN_LIST TEST_ENVIRONMENT AND NOT "roctracer" IN_LIST
TEST_ENVIRONMENT)
list(APPEND TEST_LABELS "roctracer")
endif()
if("OMNITRACE_USE_ROCM_SMI=ON" IN_LIST TEST_ENVIRONMENT AND NOT "rocm-smi" IN_LIST
TEST_ENVIRONMENT)
list(APPEND TEST_LABELS "rocm-smi")
endif()
if("OMNITRACE_USE_ROCPROFILER=ON" IN_LIST TEST_ENVIRONMENT
AND NOT "rocprofiler" IN_LIST TEST_ENVIRONMENT)
list(APPEND TEST_LABELS "rocprofiler")
endif()
if(TARGET ${TEST_TARGET})
if(DEFINED TEST_MPI
AND ${TEST_MPI}