HIP API args in perfetto + new perfetto categories (#76)
* HIP API perfetto args + updated perfetto categories
- Support for HIP API args field in perfetto
- PERFETTO_CATEGORIES -> OMNITRACE_PERFETTO_CATEGORIES
- Changed perfetto categories for several trace events and trace counters
- migrated several TRACE_EVENT_* to use omnitrace::tracing::{push,pop}_perfetto_ts(...)
* Tweaked category_region to encode the type of args as well as value
- Affects MPI args field in perfetto
* Improved testing in ubuntu-focal.yml
- "Test Install" step sources setup-env.sh
- "Test Install" step tests python support
- "Test Install" step tests reading ~/.omnitrace.cfg
- Avoid installing boost and tbb libs when building from submodule
* validate-perfetto-proto.py accepts -m / --categories
* Remove reference from category_region typeids
* Tweak opensuse action name
* Tweak the "Test Install" Step of ubuntu-focal
[ROCm/rocprofiler-systems commit: 2e1fd5a3c4]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
e5df51c9f6
Коммит
e292002235
@@ -89,6 +89,7 @@ parse:
|
||||
NAME: '*'
|
||||
ARGS: '*'
|
||||
PERFETTO_FILE: '*'
|
||||
PERFETTO_METRIC: '*'
|
||||
TIMEMORY_FILE: '*'
|
||||
TIMEMORY_METRIC: '*'
|
||||
rocm_version_message:
|
||||
|
||||
+1
-1
@@ -1,4 +1,4 @@
|
||||
name: OpenSUSE 15.2, 15.3 (Python)
|
||||
name: OpenSUSE 15 (GCC, Python)
|
||||
|
||||
on:
|
||||
push:
|
||||
|
||||
+23
-8
@@ -28,15 +28,17 @@ jobs:
|
||||
python: ['ON']
|
||||
ompt: ['ON']
|
||||
papi: ['ON']
|
||||
deps: ['libtbb-dev libboost-{atomic,system,thread,date-time,filesystem,timer}-dev']
|
||||
include:
|
||||
- compiler: 'g++'
|
||||
mpi: ''
|
||||
boost: 'ON'
|
||||
tbb: 'ON'
|
||||
build-type: 'Release'
|
||||
python: 'OFF'
|
||||
python: 'ON'
|
||||
ompt: 'OFF'
|
||||
papi: 'OFF'
|
||||
deps: ''
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
@@ -45,7 +47,7 @@ jobs:
|
||||
timeout-minutes: 5
|
||||
run:
|
||||
sudo apt-get update &&
|
||||
sudo apt-get install -y build-essential m4 autoconf libtool python3-pip libtbb-dev libboost-{atomic,system,thread,date-time,filesystem,timer}-dev clang libomp-dev ${{ matrix.compiler }} ${{ matrix.mpi }} &&
|
||||
sudo apt-get install -y build-essential m4 autoconf libtool python3-pip ${{ matrix.deps }} clang libomp-dev ${{ matrix.compiler }} ${{ matrix.mpi }} &&
|
||||
python3 -m pip install --upgrade pip &&
|
||||
python3 -m pip install numpy &&
|
||||
python3 -m pip install perfetto &&
|
||||
@@ -55,8 +57,7 @@ jobs:
|
||||
run:
|
||||
echo "CC=$(echo '${{ matrix.compiler }}' | sed 's/+/c/g')" >> $GITHUB_ENV &&
|
||||
echo "CXX=${{ matrix.compiler }}" >> $GITHUB_ENV &&
|
||||
echo "/opt/omnitrace/bin:${HOME}/.local/bin" >> $GITHUB_PATH &&
|
||||
echo "LD_LIBRARY_PATH=/opt/omnitrace/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
|
||||
echo "${HOME}/.local/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Configure CMake
|
||||
timeout-minutes: 10
|
||||
@@ -103,25 +104,39 @@ jobs:
|
||||
./omnitrace-*.sh --prefix=/opt/omnitrace --exclude-subdir --skip-license
|
||||
|
||||
- name: Test Install
|
||||
timeout-minutes: 10
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
set -v
|
||||
export OMNITRACE_DEBUG=ON
|
||||
source /opt/omnitrace/share/omnitrace/setup-env.sh
|
||||
cat << EOF > ${HOME}/.omnitrace.cfg
|
||||
OMNITRACE_VERBOSE = 2
|
||||
OMNITRACE_USE_TIMEMORY = ON
|
||||
OMNITRACE_USE_PERFETTO = ON
|
||||
OMNITRACE_USE_SAMPLING = ON
|
||||
OMNITRACE_USE_PROCESS_SAMPLING = ON
|
||||
OMNITRACE_OUTPUT_PATH = omnitrace-tests-output
|
||||
OMNITRACE_OUTPUT_PREFIX = %tag%/
|
||||
OMNITRACE_SAMPLING_FREQ = 50
|
||||
OMNITRACE_SAMPLING_DELAY = 0.05
|
||||
EOF
|
||||
which omnitrace-avail
|
||||
ldd $(which omnitrace-avail)
|
||||
omnitrace-avail --help
|
||||
omnitrace-avail -a
|
||||
which omnitrace-critical-trace
|
||||
ldd $(which omnitrace-critical-trace)
|
||||
which omnitrace-python
|
||||
omnitrace-python --help
|
||||
omnitrace-python -b -- ${{ github.workspace }}/examples/python/builtin.py
|
||||
which omnitrace
|
||||
ldd $(which omnitrace)
|
||||
omnitrace --help
|
||||
omnitrace -e -v 1 -o ls.inst --simulate -- ls
|
||||
for i in omnitrace-ls.inst-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done
|
||||
for i in omnitrace-tests-output/ls.inst/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done
|
||||
omnitrace -e -v 1 -o ls.inst -- ls
|
||||
./ls.inst
|
||||
omnitrace -e -v 1 --simulate -- ls
|
||||
for i in omnitrace-ls-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done
|
||||
for i in omnitrace-tests-output/ls/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done
|
||||
omnitrace -e -v 1 -- ls
|
||||
|
||||
- name: Test User API
|
||||
|
||||
+10
-10
@@ -516,15 +516,15 @@ backtrace::post_process(int64_t _tid)
|
||||
_last_bt = _bt;
|
||||
_mean_ts += _ts;
|
||||
|
||||
TRACE_COUNTER("sampling",
|
||||
TRACE_COUNTER("thread_peak_memory",
|
||||
perfetto_counter_track<perfetto_rusage>::at(_tid, 0), _ts,
|
||||
_bt->m_mem_peak / units::megabyte);
|
||||
|
||||
TRACE_COUNTER("sampling",
|
||||
TRACE_COUNTER("thread_context_switch",
|
||||
perfetto_counter_track<perfetto_rusage>::at(_tid, 1), _ts,
|
||||
_bt->m_ctx_swch);
|
||||
|
||||
TRACE_COUNTER("sampling",
|
||||
TRACE_COUNTER("thread_page_fault",
|
||||
perfetto_counter_track<perfetto_rusage>::at(_tid, 2), _ts,
|
||||
_bt->m_page_flt);
|
||||
|
||||
@@ -535,7 +535,7 @@ backtrace::post_process(int64_t _tid)
|
||||
{
|
||||
if(i < _bt->m_hw_counter.size())
|
||||
{
|
||||
TRACE_COUNTER("sampling",
|
||||
TRACE_COUNTER("hardware_counter",
|
||||
perfetto_counter_track<hw_counters>::at(_tid, i),
|
||||
_ts, _bt->m_hw_counter.at(i));
|
||||
}
|
||||
@@ -547,15 +547,15 @@ backtrace::post_process(int64_t _tid)
|
||||
{
|
||||
auto _ts = pthread_create_gotcha::get_execution_time(_tid)->second;
|
||||
uint64_t _zero = 0;
|
||||
TRACE_COUNTER("sampling",
|
||||
TRACE_COUNTER("thread_peak_memory",
|
||||
perfetto_counter_track<perfetto_rusage>::at(_tid, 0), _ts,
|
||||
_zero);
|
||||
|
||||
TRACE_COUNTER("sampling",
|
||||
TRACE_COUNTER("thread_context_switch",
|
||||
perfetto_counter_track<perfetto_rusage>::at(_tid, 1), _ts,
|
||||
_zero);
|
||||
|
||||
TRACE_COUNTER("sampling",
|
||||
TRACE_COUNTER("thread_page_fault",
|
||||
perfetto_counter_track<perfetto_rusage>::at(_tid, 2), _ts,
|
||||
_zero);
|
||||
|
||||
@@ -566,7 +566,7 @@ backtrace::post_process(int64_t _tid)
|
||||
{
|
||||
if(i < _last_bt->m_hw_counter.size())
|
||||
{
|
||||
TRACE_COUNTER("sampling",
|
||||
TRACE_COUNTER("hardware_counter",
|
||||
perfetto_counter_track<hw_counters>::at(_tid, i),
|
||||
_ts, _zero);
|
||||
}
|
||||
@@ -601,10 +601,10 @@ backtrace::post_process(int64_t _tid)
|
||||
auto _ts = _bt->m_ts;
|
||||
if(!pthread_create_gotcha::is_valid_execution_time(_tid, _ts)) continue;
|
||||
|
||||
TRACE_EVENT_BEGIN("sampling",
|
||||
TRACE_EVENT_BEGIN("hardware_counter",
|
||||
perfetto::StaticString{ sitr.first->c_str() },
|
||||
_last_wall_ts, "begin_ns", _last_wall_ts);
|
||||
TRACE_EVENT_END("sampling", _ts, "end_ns", _ts);
|
||||
TRACE_EVENT_END("hardware_counter", _ts, "end_ns", _ts);
|
||||
}
|
||||
_last_wall_ts = _bt->m_ts;
|
||||
}
|
||||
|
||||
+3
-1
@@ -145,7 +145,9 @@ category_region<CategoryT>::audit(const gotcha_data_t& _data, audit::incoming,
|
||||
Args&&... _args)
|
||||
{
|
||||
OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal);
|
||||
start(_data.tool_id.c_str(), "args", JOIN(", ", _args...));
|
||||
start(_data.tool_id.c_str(), "args",
|
||||
JOIN(", ",
|
||||
JOIN('=', tim::try_demangle<std::remove_reference_t<Args>>(), _args)...));
|
||||
}
|
||||
|
||||
template <typename CategoryT>
|
||||
|
||||
@@ -61,8 +61,12 @@ TIMEMORY_DECLARE_TYPE_TRAIT(name, typename Tp)
|
||||
}
|
||||
|
||||
TIMEMORY_DEFINE_NS_API(category, host)
|
||||
TIMEMORY_DEFINE_NS_API(category, device)
|
||||
TIMEMORY_DEFINE_NS_API(category, user)
|
||||
TIMEMORY_DEFINE_NS_API(category, device)
|
||||
TIMEMORY_DEFINE_NS_API(category, device_hip)
|
||||
TIMEMORY_DEFINE_NS_API(category, device_hsa)
|
||||
TIMEMORY_DEFINE_NS_API(category, rocm_hip)
|
||||
TIMEMORY_DEFINE_NS_API(category, rocm_hsa)
|
||||
TIMEMORY_DEFINE_NS_API(category, rocm_smi)
|
||||
TIMEMORY_DEFINE_NS_API(category, kokkos)
|
||||
TIMEMORY_DEFINE_NS_API(category, mpi)
|
||||
@@ -73,7 +77,11 @@ TIMEMORY_DEFINE_NS_API(category, device_critical_trace)
|
||||
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("host", category::host);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("device", category::device);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("device_hip", category::device_hip);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("device_hsa", category::device_hsa);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("user", category::user);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("rocm_hip", category::rocm_hip);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("rocm_hsa", category::rocm_hsa);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("rocm_smi", category::rocm_smi);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("sampling", category::sampling);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("thread_sampling", category::thread_sampling);
|
||||
|
||||
+5
-4
@@ -246,10 +246,11 @@ data::post_process(uint32_t _dev_id)
|
||||
double _temp = itr.m_temp / 1.0e3;
|
||||
double _power = itr.m_power / 1.0e6;
|
||||
double _usage = itr.m_mem_usage / static_cast<double>(units::megabyte);
|
||||
TRACE_COUNTER("rocm_smi", counter_track::at(_dev_id, 0), _ts, _busy);
|
||||
TRACE_COUNTER("rocm_smi", counter_track::at(_dev_id, 1), _ts, _temp);
|
||||
TRACE_COUNTER("rocm_smi", counter_track::at(_dev_id, 2), _ts, _power);
|
||||
TRACE_COUNTER("rocm_smi", counter_track::at(_dev_id, 3), _ts, _usage);
|
||||
TRACE_COUNTER("device_busy", counter_track::at(_dev_id, 0), _ts, _busy);
|
||||
TRACE_COUNTER("device_temp", counter_track::at(_dev_id, 1), _ts, _temp);
|
||||
TRACE_COUNTER("device_power", counter_track::at(_dev_id, 2), _ts, _power);
|
||||
TRACE_COUNTER("device_memory_usage", counter_track::at(_dev_id, 3), _ts,
|
||||
_usage);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
+57
-24
@@ -28,6 +28,7 @@
|
||||
#include "library/runtime.hpp"
|
||||
#include "library/sampling.hpp"
|
||||
#include "library/thread_data.hpp"
|
||||
#include "library/tracing.hpp"
|
||||
|
||||
#include <timemory/backends/cpu.hpp>
|
||||
#include <timemory/backends/threading.hpp>
|
||||
@@ -38,6 +39,8 @@
|
||||
#include <cstdint>
|
||||
#include <tuple>
|
||||
|
||||
#define HIP_PROF_HIP_API_STRING 1
|
||||
|
||||
#include <roctracer_ext.h>
|
||||
#include <roctracer_hcc.h>
|
||||
#include <roctracer_hip.h>
|
||||
@@ -45,12 +48,36 @@
|
||||
#define AMD_INTERNAL_BUILD 1
|
||||
#include <roctracer_hsa.h>
|
||||
|
||||
#if __has_include(<hip/amd_detail/hip_prof_str.h>) || (defined(OMNITRACE_USE_HIP) && OMNITRACE_USE_HIP > 0)
|
||||
# include <hip/amd_detail/hip_prof_str.h>
|
||||
# define OMNITRACE_HIP_API_ARGS 1
|
||||
#else
|
||||
# define OMNITRACE_HIP_API_ARGS 0
|
||||
#endif
|
||||
|
||||
TIMEMORY_DEFINE_API(roctracer)
|
||||
namespace omnitrace
|
||||
{
|
||||
namespace api = tim::api;
|
||||
namespace
|
||||
{
|
||||
std::string
|
||||
hip_api_string(hip_api_id_t id, const hip_api_data_t* data)
|
||||
{
|
||||
#if OMNITRACE_HIP_API_ARGS > 0
|
||||
std::string _v = hipApiString(id, data);
|
||||
if(_v.empty()) return _v;
|
||||
auto _pbeg = _v.find('(');
|
||||
if(_pbeg == std::string::npos) return _v;
|
||||
auto _pend = _v.find_last_of(')');
|
||||
if(_pend == std::string::npos || _pbeg >= _pend) return _v;
|
||||
auto _n = (_pend - _pbeg - 1);
|
||||
return _v.substr(_pbeg + 1, _n);
|
||||
#else
|
||||
tim::consume_parameters(id, data);
|
||||
#endif
|
||||
}
|
||||
//
|
||||
int64_t
|
||||
get_clock_skew()
|
||||
{
|
||||
@@ -275,11 +302,12 @@ hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
TRACE_EVENT_BEGIN("device", perfetto::StaticString{ _name },
|
||||
static_cast<uint64_t>(begin_timestamp), "begin_ns",
|
||||
static_cast<uint64_t>(begin_timestamp));
|
||||
TRACE_EVENT_END("device", static_cast<uint64_t>(end_timestamp),
|
||||
"end_ns", static_cast<uint64_t>(end_timestamp));
|
||||
uint64_t _beg_ts = begin_timestamp;
|
||||
uint64_t _end_ts = end_timestamp;
|
||||
tracing::push_perfetto_ts(category::rocm_hsa{}, _name, _beg_ts,
|
||||
"begin_ns", _beg_ts);
|
||||
tracing::pop_perfetto_ts(category::rocm_hsa{}, _name, _end_ts,
|
||||
"end_ns", _end_ts);
|
||||
}
|
||||
|
||||
if(get_use_timemory())
|
||||
@@ -334,6 +362,9 @@ hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg)
|
||||
default: break;
|
||||
}
|
||||
|
||||
OMNITRACE_CI_FAIL(_name == nullptr, "Error! HSA operation type not handled: %u\n",
|
||||
op);
|
||||
|
||||
if(!_name) return;
|
||||
|
||||
auto _beg_ns = record->begin_ns + get_clock_skew();
|
||||
@@ -347,11 +378,10 @@ hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg)
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
TRACE_EVENT_BEGIN("device", perfetto::StaticString{ *_name },
|
||||
static_cast<uint64_t>(_beg_ns), "begin_ns",
|
||||
static_cast<uint64_t>(_beg_ns));
|
||||
TRACE_EVENT_END("device", static_cast<uint64_t>(_end_ns), "end_ns",
|
||||
static_cast<uint64_t>(_end_ns));
|
||||
uint64_t _beg = _beg_ns;
|
||||
uint64_t _end = _end_ns;
|
||||
tracing::push_perfetto_ts(category::device_hsa{}, *_name, _beg, "begin_ns", _beg);
|
||||
tracing::pop_perfetto_ts(category::device_hsa{}, *_name, _end, "end_ns", _end);
|
||||
}
|
||||
|
||||
auto _func = [_beg_ns, _end_ns, _name]() {
|
||||
@@ -582,11 +612,12 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
TRACE_EVENT_BEGIN(
|
||||
"host", perfetto::StaticString{ op_name }, static_cast<uint64_t>(_ts),
|
||||
perfetto::Flow::ProcessScoped(_cid), "begin_ns",
|
||||
static_cast<uint64_t>(_ts), "pcid", _parent_cid, "cid", _cid, "device",
|
||||
_device_id, "tid", _tid, "depth", _depth, "corr_id", _corr_id);
|
||||
auto _api_id = static_cast<hip_api_id_t>(cid);
|
||||
tracing::push_perfetto_ts(
|
||||
category::rocm_hip{}, op_name, _ts, perfetto::Flow::ProcessScoped(_cid),
|
||||
"begin_ns", static_cast<uint64_t>(_ts), "pcid", _parent_cid, "cid", _cid,
|
||||
"device", _device_id, "tid", _tid, "depth", _depth, "corr_id", _corr_id,
|
||||
"args", hip_api_string(_api_id, data));
|
||||
}
|
||||
if(get_use_timemory())
|
||||
{
|
||||
@@ -622,8 +653,8 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void*
|
||||
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
TRACE_EVENT_END("host", static_cast<uint64_t>(_ts), "end_ns",
|
||||
static_cast<uint64_t>(_ts));
|
||||
tracing::pop_perfetto_ts(category::rocm_hip{}, op_name, _ts, "end_ns",
|
||||
static_cast<uint64_t>(_ts));
|
||||
}
|
||||
if(get_use_timemory())
|
||||
{
|
||||
@@ -779,14 +810,16 @@ hip_activity_callback(const char* begin, const char* end, void*)
|
||||
_kernel_names.emplace(_name, tim::demangle(_name));
|
||||
|
||||
assert(_end_ns > _beg_ns);
|
||||
TRACE_EVENT_BEGIN("device",
|
||||
perfetto::StaticString{ _kernel_names.at(_name).c_str() },
|
||||
_beg_ns, perfetto::Flow::ProcessScoped(_cid), "begin_ns",
|
||||
_beg_ns, "corr_id", record->correlation_id, "device",
|
||||
_devid, "queue", _queid, "op", _op_id_names.at(record->op));
|
||||
TRACE_EVENT_END("device", _end_ns, "end_ns", _end_ns);
|
||||
tracing::push_perfetto_ts(
|
||||
category::device_hip{}, _kernel_names.at(_name).c_str(), _beg_ns,
|
||||
perfetto::Flow::ProcessScoped(_cid), "begin_ns", _beg_ns, "corr_id",
|
||||
record->correlation_id, "device", _devid, "queue", _queid, "op",
|
||||
_op_id_names.at(record->op));
|
||||
tracing::pop_perfetto_ts(category::device_hip{}, "", _end_ns, "end_ns",
|
||||
_end_ns);
|
||||
// for some reason, this is necessary to make sure very last one ends
|
||||
TRACE_EVENT_END("device", _end_ns, "end_ns", _end_ns);
|
||||
tracing::pop_perfetto_ts(category::device_hip{}, "", _end_ns, "end_ns",
|
||||
_end_ns);
|
||||
}
|
||||
|
||||
if(_critical_trace)
|
||||
|
||||
@@ -80,7 +80,24 @@ void init_perfetto_counter_tracks(type_list<Types...>)
|
||||
(perfetto_counter_track<Types>::init(), ...);
|
||||
}
|
||||
} // namespace
|
||||
} // namespace cpu_freq
|
||||
} // namespace omnitrace
|
||||
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("cpu_freq", omnitrace::cpu_freq::cpu_freq);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("process_page_fault", omnitrace::cpu_freq::cpu_page);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("process_virtual_memory", omnitrace::cpu_freq::cpu_virt);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("process_context_switch",
|
||||
omnitrace::cpu_freq::cpu_context_switch);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("process_page_fault", omnitrace::cpu_freq::cpu_page_fault);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("process_user_cpu_time",
|
||||
omnitrace::cpu_freq::cpu_user_mode_time);
|
||||
TIMEMORY_DEFINE_NAME_TRAIT("process_kernel_cpu_time",
|
||||
omnitrace::cpu_freq::cpu_kernel_mode_time);
|
||||
|
||||
namespace omnitrace
|
||||
{
|
||||
namespace cpu_freq
|
||||
{
|
||||
void
|
||||
setup()
|
||||
{
|
||||
@@ -230,7 +247,7 @@ void
|
||||
write_perfetto_counter_track(Args... _args)
|
||||
{
|
||||
using track = perfetto_counter_track<Tp>;
|
||||
TRACE_COUNTER("sampling", track::at(0, 0), _args...);
|
||||
TRACE_COUNTER(trait::name<Tp>::value, track::at(0, 0), _args...);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
@@ -254,11 +271,11 @@ post_process()
|
||||
uint64_t _ts = std::get<0>(itr);
|
||||
double _freq = std::get<7>(itr).at(_offset);
|
||||
if(!pthread_create_gotcha::is_valid_execution_time(0, _ts)) continue;
|
||||
TRACE_COUNTER("sampling", freq_track::at(_idx, 0), _ts, _freq);
|
||||
TRACE_COUNTER("cpu_freq", freq_track::at(_idx, 0), _ts, _freq);
|
||||
}
|
||||
|
||||
auto _end_ts = pthread_create_gotcha::get_execution_time(0)->second;
|
||||
TRACE_COUNTER("sampling", freq_track::at(_idx, 0), _end_ts, 0);
|
||||
TRACE_COUNTER("cpu_freq", freq_track::at(_idx, 0), _end_ts, 0);
|
||||
};
|
||||
|
||||
auto _process_cpu_rusage = []() {
|
||||
|
||||
@@ -24,56 +24,72 @@
|
||||
|
||||
#include "library/defines.hpp"
|
||||
|
||||
#if defined(PERFETTO_CATEGORIES)
|
||||
# error "PERFETTO_CATEGORIES is already defined. Please include \"" __FILE__ "\" before including any timemory files"
|
||||
#if defined(OMNITRACE_PERFETTO_CATEGORIES)
|
||||
# error "OMNITRACE_PERFETTO_CATEGORIES is already defined. Please include \"" __FILE__ "\" before including any timemory files"
|
||||
#endif
|
||||
|
||||
#if !defined(TIMEMORY_USE_PERFETTO)
|
||||
# include <perfetto.h>
|
||||
# define PERFETTO_CATEGORIES \
|
||||
perfetto::Category("host").SetDescription("Host-side function tracing"), \
|
||||
perfetto::Category("device").SetDescription("Device-side function tracing"), \
|
||||
perfetto::Category("user").SetDescription("User-defined regions"), \
|
||||
perfetto::Category("rocm_smi").SetDescription("Device-level metrics"), \
|
||||
perfetto::Category("sampling") \
|
||||
.SetDescription("Metrics derived from sampling"), \
|
||||
perfetto::Category("thread_sampling") \
|
||||
.SetDescription("Metrics derived from background thread sampling"), \
|
||||
perfetto::Category("mpi").SetDescription("MPI regions"), \
|
||||
perfetto::Category("kokkos").SetDescription("Kokkos regions"), \
|
||||
perfetto::Category("ompt").SetDescription("OpenMP Tools regions"), \
|
||||
perfetto::Category("critical-trace") \
|
||||
.SetDescription("Combined critical traces"), \
|
||||
perfetto::Category("host-critical-trace") \
|
||||
.SetDescription("Host-side critical traces"), \
|
||||
perfetto::Category("device-critical-trace") \
|
||||
.SetDescription("Device-side critical traces")
|
||||
#define OMNITRACE_PERFETTO_CATEGORIES \
|
||||
perfetto::Category("host").SetDescription("Host-side function tracing"), \
|
||||
perfetto::Category("user").SetDescription("User-defined regions"), \
|
||||
perfetto::Category("device_hip") \
|
||||
.SetDescription("Device-side functions submitted via HSA API"), \
|
||||
perfetto::Category("device_hsa") \
|
||||
.SetDescription("Device-side functions submitted via HIP API"), \
|
||||
perfetto::Category("rocm_hip").SetDescription("Host-side HIP functions"), \
|
||||
perfetto::Category("rocm_hsa").SetDescription("Host-side HSA functions"), \
|
||||
perfetto::Category("device_busy") \
|
||||
.SetDescription("Busy percentage of a GPU device"), \
|
||||
perfetto::Category("device_temp") \
|
||||
.SetDescription("Temperature of GPU device in degC"), \
|
||||
perfetto::Category("device_power") \
|
||||
.SetDescription("Power consumption of GPU device in watts"), \
|
||||
perfetto::Category("device_memory_usage") \
|
||||
.SetDescription("Memory usage of GPU device in MB"), \
|
||||
perfetto::Category("thread_peak_memory") \
|
||||
.SetDescription( \
|
||||
"Peak memory usage on thread in MB (derived from sampling)"), \
|
||||
perfetto::Category("thread_context_switch") \
|
||||
.SetDescription("Context switches on thread (derived from sampling)"), \
|
||||
perfetto::Category("thread_page_fault") \
|
||||
.SetDescription("Memory page faults on thread (derived from sampling)"), \
|
||||
perfetto::Category("hardware_counter") \
|
||||
.SetDescription("Hardware counter value on thread (derived from sampling)"), \
|
||||
perfetto::Category("cpu_freq") \
|
||||
.SetDescription("CPU frequency in MHz (collected in background thread)"), \
|
||||
perfetto::Category("process_page_fault") \
|
||||
.SetDescription( \
|
||||
"Memory page faults in process (collected in background thread)"), \
|
||||
perfetto::Category("process_virtual_memory") \
|
||||
.SetDescription("Virtual memory usage in process in MB (collected in " \
|
||||
"background thread)"), \
|
||||
perfetto::Category("process_context_switch") \
|
||||
.SetDescription( \
|
||||
"Context switches in process (collected in background thread)"), \
|
||||
perfetto::Category("process_page_fault") \
|
||||
.SetDescription( \
|
||||
"Memory page faults in process (collected in background thread)"), \
|
||||
perfetto::Category("process_user_cpu_time") \
|
||||
.SetDescription("CPU time of functions executing in user-space in process " \
|
||||
"in seconds (collected in background thread)"), \
|
||||
perfetto::Category("process_kernel_cpu_time") \
|
||||
.SetDescription("CPU time of functions executing in kernel-space in " \
|
||||
"process in seconds (collected in background thread)"), \
|
||||
perfetto::Category("mpi").SetDescription("MPI regions"), \
|
||||
perfetto::Category("kokkos").SetDescription("Kokkos regions"), \
|
||||
perfetto::Category("ompt").SetDescription("OpenMP Tools regions"), \
|
||||
perfetto::Category("critical-trace").SetDescription("Combined critical traces"), \
|
||||
perfetto::Category("host-critical-trace") \
|
||||
.SetDescription("Host-side critical traces"), \
|
||||
perfetto::Category("device-critical-trace") \
|
||||
.SetDescription("Device-side critical traces"), \
|
||||
perfetto::Category("timemory").SetDescription("Events from the timemory API")
|
||||
|
||||
#if defined(TIMEMORY_USE_PERFETTO)
|
||||
# define TIMEMORY_PERFETTO_CATEGORIES OMNITRACE_PERFETTO_CATEGORIES
|
||||
# include <timemory/components/perfetto/backends.hpp>
|
||||
#else
|
||||
# define PERFETTO_CATEGORIES \
|
||||
perfetto::Category("host").SetDescription("Host-side function tracing"), \
|
||||
perfetto::Category("device").SetDescription("Device-side function tracing"), \
|
||||
perfetto::Category("user").SetDescription("User-defined regions"), \
|
||||
perfetto::Category("rocm_smi").SetDescription("Device-level metrics"), \
|
||||
perfetto::Category("sampling") \
|
||||
.SetDescription("Metrics derived from sampling"), \
|
||||
perfetto::Category("thread_sampling") \
|
||||
.SetDescription("Metrics derived from background thread sampling"), \
|
||||
perfetto::Category("mpi").SetDescription("MPI regions"), \
|
||||
perfetto::Category("kokkos").SetDescription("Kokkos regions"), \
|
||||
perfetto::Category("ompt").SetDescription("OpenMP Tools regions"), \
|
||||
perfetto::Category("critical-trace") \
|
||||
.SetDescription("Combined critical traces"), \
|
||||
perfetto::Category("host-critical-trace") \
|
||||
.SetDescription("Host-side critical traces"), \
|
||||
perfetto::Category("device-critical-trace") \
|
||||
.SetDescription("Device-side critical traces"), \
|
||||
perfetto::Category("timemory") \
|
||||
.SetDescription("Events from the timemory API")
|
||||
# define TIMEMORY_PERFETTO_CATEGORIES PERFETTO_CATEGORIES
|
||||
#endif
|
||||
|
||||
#if !defined(TIMEMORY_USE_PERFETTO)
|
||||
PERFETTO_DEFINE_CATEGORIES(PERFETTO_CATEGORIES);
|
||||
# include <perfetto.h>
|
||||
PERFETTO_DEFINE_CATEGORIES(OMNITRACE_PERFETTO_CATEGORIES);
|
||||
#endif
|
||||
|
||||
#include "library/debug.hpp"
|
||||
|
||||
@@ -1018,8 +1018,9 @@ foreach(_VERSION ${OMNITRACE_PYTHON_VERSIONS})
|
||||
endif()
|
||||
|
||||
function(OMNITRACE_ADD_PYTHON_VALIDATION_TEST)
|
||||
cmake_parse_arguments(TEST "" "NAME;TIMEMORY_METRIC;TIMEMORY_FILE;PERFETTO_FILE"
|
||||
"ARGS" ${ARGN})
|
||||
cmake_parse_arguments(
|
||||
TEST "" "NAME;TIMEMORY_METRIC;TIMEMORY_FILE;PERFETTO_METRIC;PERFETTO_FILE"
|
||||
"ARGS" ${ARGN})
|
||||
|
||||
omnitrace_add_python_test(
|
||||
NAME ${TEST_NAME}-validate-timemory
|
||||
@@ -1037,7 +1038,7 @@ foreach(_VERSION ${OMNITRACE_PYTHON_VERSIONS})
|
||||
NAME ${TEST_NAME}-validate-perfetto
|
||||
COMMAND
|
||||
${_PYTHON_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/validate-perfetto-proto.py
|
||||
${TEST_ARGS} -p -i
|
||||
-m ${TEST_PERFETTO_METRIC} ${TEST_ARGS} -p -i
|
||||
PYTHON_VERSION ${_VERSION}
|
||||
FILE omnitrace-tests-output/${TEST_NAME}/${_VERSION}/${TEST_PERFETTO_FILE}
|
||||
DEPENDS ${TEST_NAME}-${_VERSION}
|
||||
@@ -1050,6 +1051,7 @@ foreach(_VERSION ${OMNITRACE_PYTHON_VERSIONS})
|
||||
NAME python-source
|
||||
TIMEMORY_METRIC "trip_count"
|
||||
TIMEMORY_FILE "trip_count.json"
|
||||
PERFETTO_METRIC "host;user"
|
||||
PERFETTO_FILE "perfetto-trace.proto"
|
||||
ARGS -l
|
||||
run
|
||||
|
||||
@@ -40,6 +40,9 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"-d", "--depths", nargs="+", type=int, help="Expected depths", default=[]
|
||||
)
|
||||
parser.add_argument(
|
||||
"-m", "--categories", nargs="+", help="Perfetto categories", default=[]
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p", "--print", action="store_true", help="Print the processed perfetto data"
|
||||
)
|
||||
@@ -55,9 +58,11 @@ if __name__ == "__main__":
|
||||
tp = TraceProcessor(trace=(args.input))
|
||||
pdata = {}
|
||||
# get data from perfetto
|
||||
qr_it = tp.query("SELECT name, depth FROM slice")
|
||||
qr_it = tp.query("SELECT name, depth, category FROM slice")
|
||||
# loop over data rows from perfetto
|
||||
for row in qr_it:
|
||||
if args.categories and row.category not in args.categories:
|
||||
continue
|
||||
if row.name not in pdata:
|
||||
pdata[row.name] = {}
|
||||
if row.depth not in pdata[row.name]:
|
||||
|
||||
Ссылка в новой задаче
Block a user