HIP API args in perfetto + new perfetto categories (#76)

* HIP API perfetto args + updated perfetto categories

- Support for HIP API args field in perfetto
- PERFETTO_CATEGORIES -> OMNITRACE_PERFETTO_CATEGORIES
- Changed perfetto categories for several trace events and trace counters
- migrated several TRACE_EVENT_* to use omnitrace::tracing::{push,pop}_perfetto_ts(...)

* Tweaked category_region to encode the type of args as well as value

- Affects MPI args field in perfetto

* Improved testing in ubuntu-focal.yml

- "Test Install" step sources setup-env.sh
- "Test Install" step tests python support
- "Test Install" step tests reading ~/.omnitrace.cfg
- Avoid installing boost and tbb libs when building from submodule

* validate-perfetto-proto.py accepts -m / --categories

* Remove reference from category_region typeids

* Tweak opensuse action name

* Tweak the "Test Install" Step of ubuntu-focal

[ROCm/rocprofiler-systems commit: 2e1fd5a3c4]
Этот коммит содержится в:
Jonathan R. Madsen
2022-06-29 16:26:02 -05:00
коммит произвёл GitHub
родитель e5df51c9f6
Коммит e292002235
12 изменённых файлов: 203 добавлений и 103 удалений
+1
Просмотреть файл
@@ -89,6 +89,7 @@ parse:
NAME: '*'
ARGS: '*'
PERFETTO_FILE: '*'
PERFETTO_METRIC: '*'
TIMEMORY_FILE: '*'
TIMEMORY_METRIC: '*'
rocm_version_message:
+1 -1
Просмотреть файл
@@ -1,4 +1,4 @@
name: OpenSUSE 15.2, 15.3 (Python)
name: OpenSUSE 15 (GCC, Python)
on:
push:
+23 -8
Просмотреть файл
@@ -28,15 +28,17 @@ jobs:
python: ['ON']
ompt: ['ON']
papi: ['ON']
deps: ['libtbb-dev libboost-{atomic,system,thread,date-time,filesystem,timer}-dev']
include:
- compiler: 'g++'
mpi: ''
boost: 'ON'
tbb: 'ON'
build-type: 'Release'
python: 'OFF'
python: 'ON'
ompt: 'OFF'
papi: 'OFF'
deps: ''
steps:
- uses: actions/checkout@v2
@@ -45,7 +47,7 @@ jobs:
timeout-minutes: 5
run:
sudo apt-get update &&
sudo apt-get install -y build-essential m4 autoconf libtool python3-pip libtbb-dev libboost-{atomic,system,thread,date-time,filesystem,timer}-dev clang libomp-dev ${{ matrix.compiler }} ${{ matrix.mpi }} &&
sudo apt-get install -y build-essential m4 autoconf libtool python3-pip ${{ matrix.deps }} clang libomp-dev ${{ matrix.compiler }} ${{ matrix.mpi }} &&
python3 -m pip install --upgrade pip &&
python3 -m pip install numpy &&
python3 -m pip install perfetto &&
@@ -55,8 +57,7 @@ jobs:
run:
echo "CC=$(echo '${{ matrix.compiler }}' | sed 's/+/c/g')" >> $GITHUB_ENV &&
echo "CXX=${{ matrix.compiler }}" >> $GITHUB_ENV &&
echo "/opt/omnitrace/bin:${HOME}/.local/bin" >> $GITHUB_PATH &&
echo "LD_LIBRARY_PATH=/opt/omnitrace/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
echo "${HOME}/.local/bin" >> $GITHUB_PATH
- name: Configure CMake
timeout-minutes: 10
@@ -103,25 +104,39 @@ jobs:
./omnitrace-*.sh --prefix=/opt/omnitrace --exclude-subdir --skip-license
- name: Test Install
timeout-minutes: 10
timeout-minutes: 15
run: |
set -v
export OMNITRACE_DEBUG=ON
source /opt/omnitrace/share/omnitrace/setup-env.sh
cat << EOF > ${HOME}/.omnitrace.cfg
OMNITRACE_VERBOSE = 2
OMNITRACE_USE_TIMEMORY = ON
OMNITRACE_USE_PERFETTO = ON
OMNITRACE_USE_SAMPLING = ON
OMNITRACE_USE_PROCESS_SAMPLING = ON
OMNITRACE_OUTPUT_PATH = omnitrace-tests-output
OMNITRACE_OUTPUT_PREFIX = %tag%/
OMNITRACE_SAMPLING_FREQ = 50
OMNITRACE_SAMPLING_DELAY = 0.05
EOF
which omnitrace-avail
ldd $(which omnitrace-avail)
omnitrace-avail --help
omnitrace-avail -a
which omnitrace-critical-trace
ldd $(which omnitrace-critical-trace)
which omnitrace-python
omnitrace-python --help
omnitrace-python -b -- ${{ github.workspace }}/examples/python/builtin.py
which omnitrace
ldd $(which omnitrace)
omnitrace --help
omnitrace -e -v 1 -o ls.inst --simulate -- ls
for i in omnitrace-ls.inst-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done
for i in omnitrace-tests-output/ls.inst/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done
omnitrace -e -v 1 -o ls.inst -- ls
./ls.inst
omnitrace -e -v 1 --simulate -- ls
for i in omnitrace-ls-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done
for i in omnitrace-tests-output/ls/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done
omnitrace -e -v 1 -- ls
- name: Test User API
+10 -10
Просмотреть файл
@@ -516,15 +516,15 @@ backtrace::post_process(int64_t _tid)
_last_bt = _bt;
_mean_ts += _ts;
TRACE_COUNTER("sampling",
TRACE_COUNTER("thread_peak_memory",
perfetto_counter_track<perfetto_rusage>::at(_tid, 0), _ts,
_bt->m_mem_peak / units::megabyte);
TRACE_COUNTER("sampling",
TRACE_COUNTER("thread_context_switch",
perfetto_counter_track<perfetto_rusage>::at(_tid, 1), _ts,
_bt->m_ctx_swch);
TRACE_COUNTER("sampling",
TRACE_COUNTER("thread_page_fault",
perfetto_counter_track<perfetto_rusage>::at(_tid, 2), _ts,
_bt->m_page_flt);
@@ -535,7 +535,7 @@ backtrace::post_process(int64_t _tid)
{
if(i < _bt->m_hw_counter.size())
{
TRACE_COUNTER("sampling",
TRACE_COUNTER("hardware_counter",
perfetto_counter_track<hw_counters>::at(_tid, i),
_ts, _bt->m_hw_counter.at(i));
}
@@ -547,15 +547,15 @@ backtrace::post_process(int64_t _tid)
{
auto _ts = pthread_create_gotcha::get_execution_time(_tid)->second;
uint64_t _zero = 0;
TRACE_COUNTER("sampling",
TRACE_COUNTER("thread_peak_memory",
perfetto_counter_track<perfetto_rusage>::at(_tid, 0), _ts,
_zero);
TRACE_COUNTER("sampling",
TRACE_COUNTER("thread_context_switch",
perfetto_counter_track<perfetto_rusage>::at(_tid, 1), _ts,
_zero);
TRACE_COUNTER("sampling",
TRACE_COUNTER("thread_page_fault",
perfetto_counter_track<perfetto_rusage>::at(_tid, 2), _ts,
_zero);
@@ -566,7 +566,7 @@ backtrace::post_process(int64_t _tid)
{
if(i < _last_bt->m_hw_counter.size())
{
TRACE_COUNTER("sampling",
TRACE_COUNTER("hardware_counter",
perfetto_counter_track<hw_counters>::at(_tid, i),
_ts, _zero);
}
@@ -601,10 +601,10 @@ backtrace::post_process(int64_t _tid)
auto _ts = _bt->m_ts;
if(!pthread_create_gotcha::is_valid_execution_time(_tid, _ts)) continue;
TRACE_EVENT_BEGIN("sampling",
TRACE_EVENT_BEGIN("hardware_counter",
perfetto::StaticString{ sitr.first->c_str() },
_last_wall_ts, "begin_ns", _last_wall_ts);
TRACE_EVENT_END("sampling", _ts, "end_ns", _ts);
TRACE_EVENT_END("hardware_counter", _ts, "end_ns", _ts);
}
_last_wall_ts = _bt->m_ts;
}
+3 -1
Просмотреть файл
@@ -145,7 +145,9 @@ category_region<CategoryT>::audit(const gotcha_data_t& _data, audit::incoming,
Args&&... _args)
{
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
start(_data.tool_id.c_str(), "args", JOIN(", ", _args...));
start(_data.tool_id.c_str(), "args",
JOIN(", ",
JOIN('=', tim::try_demangle<std::remove_reference_t<Args>>(), _args)...));
}
template <typename CategoryT>
+9 -1
Просмотреть файл
@@ -61,8 +61,12 @@ TIMEMORY_DECLARE_TYPE_TRAIT(name, typename Tp)
}
TIMEMORY_DEFINE_NS_API(category, host)
TIMEMORY_DEFINE_NS_API(category, device)
TIMEMORY_DEFINE_NS_API(category, user)
TIMEMORY_DEFINE_NS_API(category, device)
TIMEMORY_DEFINE_NS_API(category, device_hip)
TIMEMORY_DEFINE_NS_API(category, device_hsa)
TIMEMORY_DEFINE_NS_API(category, rocm_hip)
TIMEMORY_DEFINE_NS_API(category, rocm_hsa)
TIMEMORY_DEFINE_NS_API(category, rocm_smi)
TIMEMORY_DEFINE_NS_API(category, kokkos)
TIMEMORY_DEFINE_NS_API(category, mpi)
@@ -73,7 +77,11 @@ TIMEMORY_DEFINE_NS_API(category, device_critical_trace)
TIMEMORY_DEFINE_NAME_TRAIT("host", category::host);
TIMEMORY_DEFINE_NAME_TRAIT("device", category::device);
TIMEMORY_DEFINE_NAME_TRAIT("device_hip", category::device_hip);
TIMEMORY_DEFINE_NAME_TRAIT("device_hsa", category::device_hsa);
TIMEMORY_DEFINE_NAME_TRAIT("user", category::user);
TIMEMORY_DEFINE_NAME_TRAIT("rocm_hip", category::rocm_hip);
TIMEMORY_DEFINE_NAME_TRAIT("rocm_hsa", category::rocm_hsa);
TIMEMORY_DEFINE_NAME_TRAIT("rocm_smi", category::rocm_smi);
TIMEMORY_DEFINE_NAME_TRAIT("sampling", category::sampling);
TIMEMORY_DEFINE_NAME_TRAIT("thread_sampling", category::thread_sampling);
+5 -4
Просмотреть файл
@@ -246,10 +246,11 @@ data::post_process(uint32_t _dev_id)
double _temp = itr.m_temp / 1.0e3;
double _power = itr.m_power / 1.0e6;
double _usage = itr.m_mem_usage / static_cast<double>(units::megabyte);
TRACE_COUNTER("rocm_smi", counter_track::at(_dev_id, 0), _ts, _busy);
TRACE_COUNTER("rocm_smi", counter_track::at(_dev_id, 1), _ts, _temp);
TRACE_COUNTER("rocm_smi", counter_track::at(_dev_id, 2), _ts, _power);
TRACE_COUNTER("rocm_smi", counter_track::at(_dev_id, 3), _ts, _usage);
TRACE_COUNTER("device_busy", counter_track::at(_dev_id, 0), _ts, _busy);
TRACE_COUNTER("device_temp", counter_track::at(_dev_id, 1), _ts, _temp);
TRACE_COUNTER("device_power", counter_track::at(_dev_id, 2), _ts, _power);
TRACE_COUNTER("device_memory_usage", counter_track::at(_dev_id, 3), _ts,
_usage);
}
};
+57 -24
Просмотреть файл
@@ -28,6 +28,7 @@
#include "library/runtime.hpp"
#include "library/sampling.hpp"
#include "library/thread_data.hpp"
#include "library/tracing.hpp"
#include <timemory/backends/cpu.hpp>
#include <timemory/backends/threading.hpp>
@@ -38,6 +39,8 @@
#include <cstdint>
#include <tuple>
#define HIP_PROF_HIP_API_STRING 1
#include <roctracer_ext.h>
#include <roctracer_hcc.h>
#include <roctracer_hip.h>
@@ -45,12 +48,36 @@
#define AMD_INTERNAL_BUILD 1
#include <roctracer_hsa.h>
#if __has_include(<hip/amd_detail/hip_prof_str.h>) || (defined(OMNITRACE_USE_HIP) && OMNITRACE_USE_HIP > 0)
# include <hip/amd_detail/hip_prof_str.h>
# define OMNITRACE_HIP_API_ARGS 1
#else
# define OMNITRACE_HIP_API_ARGS 0
#endif
TIMEMORY_DEFINE_API(roctracer)
namespace omnitrace
{
namespace api = tim::api;
namespace
{
std::string
hip_api_string(hip_api_id_t id, const hip_api_data_t* data)
{
#if OMNITRACE_HIP_API_ARGS > 0
std::string _v = hipApiString(id, data);
if(_v.empty()) return _v;
auto _pbeg = _v.find('(');
if(_pbeg == std::string::npos) return _v;
auto _pend = _v.find_last_of(')');
if(_pend == std::string::npos || _pbeg >= _pend) return _v;
auto _n = (_pend - _pbeg - 1);
return _v.substr(_pbeg + 1, _n);
#else
tim::consume_parameters(id, data);
#endif
}
//
int64_t
get_clock_skew()
{
@@ -275,11 +302,12 @@ hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
if(get_use_perfetto())
{
TRACE_EVENT_BEGIN("device", perfetto::StaticString{ _name },
static_cast<uint64_t>(begin_timestamp), "begin_ns",
static_cast<uint64_t>(begin_timestamp));
TRACE_EVENT_END("device", static_cast<uint64_t>(end_timestamp),
"end_ns", static_cast<uint64_t>(end_timestamp));
uint64_t _beg_ts = begin_timestamp;
uint64_t _end_ts = end_timestamp;
tracing::push_perfetto_ts(category::rocm_hsa{}, _name, _beg_ts,
"begin_ns", _beg_ts);
tracing::pop_perfetto_ts(category::rocm_hsa{}, _name, _end_ts,
"end_ns", _end_ts);
}
if(get_use_timemory())
@@ -334,6 +362,9 @@ hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg)
default: break;
}
OMNITRACE_CI_FAIL(_name == nullptr, "Error! HSA operation type not handled: %u\n",
op);
if(!_name) return;
auto _beg_ns = record->begin_ns + get_clock_skew();
@@ -347,11 +378,10 @@ hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg)
if(get_use_perfetto())
{
TRACE_EVENT_BEGIN("device", perfetto::StaticString{ *_name },
static_cast<uint64_t>(_beg_ns), "begin_ns",
static_cast<uint64_t>(_beg_ns));
TRACE_EVENT_END("device", static_cast<uint64_t>(_end_ns), "end_ns",
static_cast<uint64_t>(_end_ns));
uint64_t _beg = _beg_ns;
uint64_t _end = _end_ns;
tracing::push_perfetto_ts(category::device_hsa{}, *_name, _beg, "begin_ns", _beg);
tracing::pop_perfetto_ts(category::device_hsa{}, *_name, _end, "end_ns", _end);
}
auto _func = [_beg_ns, _end_ns, _name]() {
@@ -582,11 +612,12 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
if(get_use_perfetto())
{
TRACE_EVENT_BEGIN(
"host", perfetto::StaticString{ op_name }, static_cast<uint64_t>(_ts),
perfetto::Flow::ProcessScoped(_cid), "begin_ns",
static_cast<uint64_t>(_ts), "pcid", _parent_cid, "cid", _cid, "device",
_device_id, "tid", _tid, "depth", _depth, "corr_id", _corr_id);
auto _api_id = static_cast<hip_api_id_t>(cid);
tracing::push_perfetto_ts(
category::rocm_hip{}, op_name, _ts, perfetto::Flow::ProcessScoped(_cid),
"begin_ns", static_cast<uint64_t>(_ts), "pcid", _parent_cid, "cid", _cid,
"device", _device_id, "tid", _tid, "depth", _depth, "corr_id", _corr_id,
"args", hip_api_string(_api_id, data));
}
if(get_use_timemory())
{
@@ -622,8 +653,8 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
if(get_use_perfetto())
{
TRACE_EVENT_END("host", static_cast<uint64_t>(_ts), "end_ns",
static_cast<uint64_t>(_ts));
tracing::pop_perfetto_ts(category::rocm_hip{}, op_name, _ts, "end_ns",
static_cast<uint64_t>(_ts));
}
if(get_use_timemory())
{
@@ -779,14 +810,16 @@ hip_activity_callback(const char* begin, const char* end, void*)
_kernel_names.emplace(_name, tim::demangle(_name));
assert(_end_ns > _beg_ns);
TRACE_EVENT_BEGIN("device",
perfetto::StaticString{ _kernel_names.at(_name).c_str() },
_beg_ns, perfetto::Flow::ProcessScoped(_cid), "begin_ns",
_beg_ns, "corr_id", record->correlation_id, "device",
_devid, "queue", _queid, "op", _op_id_names.at(record->op));
TRACE_EVENT_END("device", _end_ns, "end_ns", _end_ns);
tracing::push_perfetto_ts(
category::device_hip{}, _kernel_names.at(_name).c_str(), _beg_ns,
perfetto::Flow::ProcessScoped(_cid), "begin_ns", _beg_ns, "corr_id",
record->correlation_id, "device", _devid, "queue", _queid, "op",
_op_id_names.at(record->op));
tracing::pop_perfetto_ts(category::device_hip{}, "", _end_ns, "end_ns",
_end_ns);
// for some reason, this is necessary to make sure very last one ends
TRACE_EVENT_END("device", _end_ns, "end_ns", _end_ns);
tracing::pop_perfetto_ts(category::device_hip{}, "", _end_ns, "end_ns",
_end_ns);
}
if(_critical_trace)
+20 -3
Просмотреть файл
@@ -80,7 +80,24 @@ void init_perfetto_counter_tracks(type_list<Types...>)
(perfetto_counter_track<Types>::init(), ...);
}
} // namespace
} // namespace cpu_freq
} // namespace omnitrace
TIMEMORY_DEFINE_NAME_TRAIT("cpu_freq", omnitrace::cpu_freq::cpu_freq);
TIMEMORY_DEFINE_NAME_TRAIT("process_page_fault", omnitrace::cpu_freq::cpu_page);
TIMEMORY_DEFINE_NAME_TRAIT("process_virtual_memory", omnitrace::cpu_freq::cpu_virt);
TIMEMORY_DEFINE_NAME_TRAIT("process_context_switch",
omnitrace::cpu_freq::cpu_context_switch);
TIMEMORY_DEFINE_NAME_TRAIT("process_page_fault", omnitrace::cpu_freq::cpu_page_fault);
TIMEMORY_DEFINE_NAME_TRAIT("process_user_cpu_time",
omnitrace::cpu_freq::cpu_user_mode_time);
TIMEMORY_DEFINE_NAME_TRAIT("process_kernel_cpu_time",
omnitrace::cpu_freq::cpu_kernel_mode_time);
namespace omnitrace
{
namespace cpu_freq
{
void
setup()
{
@@ -230,7 +247,7 @@ void
write_perfetto_counter_track(Args... _args)
{
using track = perfetto_counter_track<Tp>;
TRACE_COUNTER("sampling", track::at(0, 0), _args...);
TRACE_COUNTER(trait::name<Tp>::value, track::at(0, 0), _args...);
}
} // namespace
@@ -254,11 +271,11 @@ post_process()
uint64_t _ts = std::get<0>(itr);
double _freq = std::get<7>(itr).at(_offset);
if(!pthread_create_gotcha::is_valid_execution_time(0, _ts)) continue;
TRACE_COUNTER("sampling", freq_track::at(_idx, 0), _ts, _freq);
TRACE_COUNTER("cpu_freq", freq_track::at(_idx, 0), _ts, _freq);
}
auto _end_ts = pthread_create_gotcha::get_execution_time(0)->second;
TRACE_COUNTER("sampling", freq_track::at(_idx, 0), _end_ts, 0);
TRACE_COUNTER("cpu_freq", freq_track::at(_idx, 0), _end_ts, 0);
};
auto _process_cpu_rusage = []() {
+63 -47
Просмотреть файл
@@ -24,56 +24,72 @@
#include "library/defines.hpp"
#if defined(PERFETTO_CATEGORIES)
# error "PERFETTO_CATEGORIES is already defined. Please include \"" __FILE__ "\" before including any timemory files"
#if defined(OMNITRACE_PERFETTO_CATEGORIES)
# error "OMNITRACE_PERFETTO_CATEGORIES is already defined. Please include \"" __FILE__ "\" before including any timemory files"
#endif
#if !defined(TIMEMORY_USE_PERFETTO)
# include <perfetto.h>
# define PERFETTO_CATEGORIES \
perfetto::Category("host").SetDescription("Host-side function tracing"), \
perfetto::Category("device").SetDescription("Device-side function tracing"), \
perfetto::Category("user").SetDescription("User-defined regions"), \
perfetto::Category("rocm_smi").SetDescription("Device-level metrics"), \
perfetto::Category("sampling") \
.SetDescription("Metrics derived from sampling"), \
perfetto::Category("thread_sampling") \
.SetDescription("Metrics derived from background thread sampling"), \
perfetto::Category("mpi").SetDescription("MPI regions"), \
perfetto::Category("kokkos").SetDescription("Kokkos regions"), \
perfetto::Category("ompt").SetDescription("OpenMP Tools regions"), \
perfetto::Category("critical-trace") \
.SetDescription("Combined critical traces"), \
perfetto::Category("host-critical-trace") \
.SetDescription("Host-side critical traces"), \
perfetto::Category("device-critical-trace") \
.SetDescription("Device-side critical traces")
#define OMNITRACE_PERFETTO_CATEGORIES \
perfetto::Category("host").SetDescription("Host-side function tracing"), \
perfetto::Category("user").SetDescription("User-defined regions"), \
perfetto::Category("device_hip") \
.SetDescription("Device-side functions submitted via HSA API"), \
perfetto::Category("device_hsa") \
.SetDescription("Device-side functions submitted via HIP API"), \
perfetto::Category("rocm_hip").SetDescription("Host-side HIP functions"), \
perfetto::Category("rocm_hsa").SetDescription("Host-side HSA functions"), \
perfetto::Category("device_busy") \
.SetDescription("Busy percentage of a GPU device"), \
perfetto::Category("device_temp") \
.SetDescription("Temperature of GPU device in degC"), \
perfetto::Category("device_power") \
.SetDescription("Power consumption of GPU device in watts"), \
perfetto::Category("device_memory_usage") \
.SetDescription("Memory usage of GPU device in MB"), \
perfetto::Category("thread_peak_memory") \
.SetDescription( \
"Peak memory usage on thread in MB (derived from sampling)"), \
perfetto::Category("thread_context_switch") \
.SetDescription("Context switches on thread (derived from sampling)"), \
perfetto::Category("thread_page_fault") \
.SetDescription("Memory page faults on thread (derived from sampling)"), \
perfetto::Category("hardware_counter") \
.SetDescription("Hardware counter value on thread (derived from sampling)"), \
perfetto::Category("cpu_freq") \
.SetDescription("CPU frequency in MHz (collected in background thread)"), \
perfetto::Category("process_page_fault") \
.SetDescription( \
"Memory page faults in process (collected in background thread)"), \
perfetto::Category("process_virtual_memory") \
.SetDescription("Virtual memory usage in process in MB (collected in " \
"background thread)"), \
perfetto::Category("process_context_switch") \
.SetDescription( \
"Context switches in process (collected in background thread)"), \
perfetto::Category("process_page_fault") \
.SetDescription( \
"Memory page faults in process (collected in background thread)"), \
perfetto::Category("process_user_cpu_time") \
.SetDescription("CPU time of functions executing in user-space in process " \
"in seconds (collected in background thread)"), \
perfetto::Category("process_kernel_cpu_time") \
.SetDescription("CPU time of functions executing in kernel-space in " \
"process in seconds (collected in background thread)"), \
perfetto::Category("mpi").SetDescription("MPI regions"), \
perfetto::Category("kokkos").SetDescription("Kokkos regions"), \
perfetto::Category("ompt").SetDescription("OpenMP Tools regions"), \
perfetto::Category("critical-trace").SetDescription("Combined critical traces"), \
perfetto::Category("host-critical-trace") \
.SetDescription("Host-side critical traces"), \
perfetto::Category("device-critical-trace") \
.SetDescription("Device-side critical traces"), \
perfetto::Category("timemory").SetDescription("Events from the timemory API")
#if defined(TIMEMORY_USE_PERFETTO)
# define TIMEMORY_PERFETTO_CATEGORIES OMNITRACE_PERFETTO_CATEGORIES
# include <timemory/components/perfetto/backends.hpp>
#else
# define PERFETTO_CATEGORIES \
perfetto::Category("host").SetDescription("Host-side function tracing"), \
perfetto::Category("device").SetDescription("Device-side function tracing"), \
perfetto::Category("user").SetDescription("User-defined regions"), \
perfetto::Category("rocm_smi").SetDescription("Device-level metrics"), \
perfetto::Category("sampling") \
.SetDescription("Metrics derived from sampling"), \
perfetto::Category("thread_sampling") \
.SetDescription("Metrics derived from background thread sampling"), \
perfetto::Category("mpi").SetDescription("MPI regions"), \
perfetto::Category("kokkos").SetDescription("Kokkos regions"), \
perfetto::Category("ompt").SetDescription("OpenMP Tools regions"), \
perfetto::Category("critical-trace") \
.SetDescription("Combined critical traces"), \
perfetto::Category("host-critical-trace") \
.SetDescription("Host-side critical traces"), \
perfetto::Category("device-critical-trace") \
.SetDescription("Device-side critical traces"), \
perfetto::Category("timemory") \
.SetDescription("Events from the timemory API")
# define TIMEMORY_PERFETTO_CATEGORIES PERFETTO_CATEGORIES
#endif
#if !defined(TIMEMORY_USE_PERFETTO)
PERFETTO_DEFINE_CATEGORIES(PERFETTO_CATEGORIES);
# include <perfetto.h>
PERFETTO_DEFINE_CATEGORIES(OMNITRACE_PERFETTO_CATEGORIES);
#endif
#include "library/debug.hpp"
+5 -3
Просмотреть файл
@@ -1018,8 +1018,9 @@ foreach(_VERSION ${OMNITRACE_PYTHON_VERSIONS})
endif()
function(OMNITRACE_ADD_PYTHON_VALIDATION_TEST)
cmake_parse_arguments(TEST "" "NAME;TIMEMORY_METRIC;TIMEMORY_FILE;PERFETTO_FILE"
"ARGS" ${ARGN})
cmake_parse_arguments(
TEST "" "NAME;TIMEMORY_METRIC;TIMEMORY_FILE;PERFETTO_METRIC;PERFETTO_FILE"
"ARGS" ${ARGN})
omnitrace_add_python_test(
NAME ${TEST_NAME}-validate-timemory
@@ -1037,7 +1038,7 @@ foreach(_VERSION ${OMNITRACE_PYTHON_VERSIONS})
NAME ${TEST_NAME}-validate-perfetto
COMMAND
${_PYTHON_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/validate-perfetto-proto.py
${TEST_ARGS} -p -i
-m ${TEST_PERFETTO_METRIC} ${TEST_ARGS} -p -i
PYTHON_VERSION ${_VERSION}
FILE omnitrace-tests-output/${TEST_NAME}/${_VERSION}/${TEST_PERFETTO_FILE}
DEPENDS ${TEST_NAME}-${_VERSION}
@@ -1050,6 +1051,7 @@ foreach(_VERSION ${OMNITRACE_PYTHON_VERSIONS})
NAME python-source
TIMEMORY_METRIC "trip_count"
TIMEMORY_FILE "trip_count.json"
PERFETTO_METRIC "host;user"
PERFETTO_FILE "perfetto-trace.proto"
ARGS -l
run
+6 -1
Просмотреть файл
@@ -40,6 +40,9 @@ if __name__ == "__main__":
parser.add_argument(
"-d", "--depths", nargs="+", type=int, help="Expected depths", default=[]
)
parser.add_argument(
"-m", "--categories", nargs="+", help="Perfetto categories", default=[]
)
parser.add_argument(
"-p", "--print", action="store_true", help="Print the processed perfetto data"
)
@@ -55,9 +58,11 @@ if __name__ == "__main__":
tp = TraceProcessor(trace=(args.input))
pdata = {}
# get data from perfetto
qr_it = tp.query("SELECT name, depth FROM slice")
qr_it = tp.query("SELECT name, depth, category FROM slice")
# loop over data rows from perfetto
for row in qr_it:
if args.categories and row.category not in args.categories:
continue
if row.name not in pdata:
pdata[row.name] = {}
if row.depth not in pdata[row.name]: