diff --git a/projects/rocprofiler-systems/.cmake-format.yaml b/projects/rocprofiler-systems/.cmake-format.yaml index 9aecde1712..da61c0e947 100644 --- a/projects/rocprofiler-systems/.cmake-format.yaml +++ b/projects/rocprofiler-systems/.cmake-format.yaml @@ -89,6 +89,7 @@ parse: NAME: '*' ARGS: '*' PERFETTO_FILE: '*' + PERFETTO_METRIC: '*' TIMEMORY_FILE: '*' TIMEMORY_METRIC: '*' rocm_version_message: diff --git a/projects/rocprofiler-systems/.github/workflows/opensuse.yml b/projects/rocprofiler-systems/.github/workflows/opensuse.yml index 38aad17ca9..c4dfd33160 100644 --- a/projects/rocprofiler-systems/.github/workflows/opensuse.yml +++ b/projects/rocprofiler-systems/.github/workflows/opensuse.yml @@ -1,4 +1,4 @@ -name: OpenSUSE 15.2, 15.3 (Python) +name: OpenSUSE 15 (GCC, Python) on: push: diff --git a/projects/rocprofiler-systems/.github/workflows/ubuntu-focal.yml b/projects/rocprofiler-systems/.github/workflows/ubuntu-focal.yml index 2c98ea21ab..e238b2a07a 100644 --- a/projects/rocprofiler-systems/.github/workflows/ubuntu-focal.yml +++ b/projects/rocprofiler-systems/.github/workflows/ubuntu-focal.yml @@ -28,15 +28,17 @@ jobs: python: ['ON'] ompt: ['ON'] papi: ['ON'] + deps: ['libtbb-dev libboost-{atomic,system,thread,date-time,filesystem,timer}-dev'] include: - compiler: 'g++' mpi: '' boost: 'ON' tbb: 'ON' build-type: 'Release' - python: 'OFF' + python: 'ON' ompt: 'OFF' papi: 'OFF' + deps: '' steps: - uses: actions/checkout@v2 @@ -45,7 +47,7 @@ jobs: timeout-minutes: 5 run: sudo apt-get update && - sudo apt-get install -y build-essential m4 autoconf libtool python3-pip libtbb-dev libboost-{atomic,system,thread,date-time,filesystem,timer}-dev clang libomp-dev ${{ matrix.compiler }} ${{ matrix.mpi }} && + sudo apt-get install -y build-essential m4 autoconf libtool python3-pip ${{ matrix.deps }} clang libomp-dev ${{ matrix.compiler }} ${{ matrix.mpi }} && python3 -m pip install --upgrade pip && python3 -m pip install numpy && python3 -m pip install perfetto && @@ -55,8 +57,7 @@ jobs: run: echo "CC=$(echo '${{ matrix.compiler }}' | sed 's/+/c/g')" >> $GITHUB_ENV && echo "CXX=${{ matrix.compiler }}" >> $GITHUB_ENV && - echo "/opt/omnitrace/bin:${HOME}/.local/bin" >> $GITHUB_PATH && - echo "LD_LIBRARY_PATH=/opt/omnitrace/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV + echo "${HOME}/.local/bin" >> $GITHUB_PATH - name: Configure CMake timeout-minutes: 10 @@ -103,25 +104,39 @@ jobs: ./omnitrace-*.sh --prefix=/opt/omnitrace --exclude-subdir --skip-license - name: Test Install - timeout-minutes: 10 + timeout-minutes: 15 run: | set -v - export OMNITRACE_DEBUG=ON + source /opt/omnitrace/share/omnitrace/setup-env.sh + cat << EOF > ${HOME}/.omnitrace.cfg + OMNITRACE_VERBOSE = 2 + OMNITRACE_USE_TIMEMORY = ON + OMNITRACE_USE_PERFETTO = ON + OMNITRACE_USE_SAMPLING = ON + OMNITRACE_USE_PROCESS_SAMPLING = ON + OMNITRACE_OUTPUT_PATH = omnitrace-tests-output + OMNITRACE_OUTPUT_PREFIX = %tag%/ + OMNITRACE_SAMPLING_FREQ = 50 + OMNITRACE_SAMPLING_DELAY = 0.05 + EOF which omnitrace-avail ldd $(which omnitrace-avail) omnitrace-avail --help omnitrace-avail -a which omnitrace-critical-trace ldd $(which omnitrace-critical-trace) + which omnitrace-python + omnitrace-python --help + omnitrace-python -b -- ${{ github.workspace }}/examples/python/builtin.py which omnitrace ldd $(which omnitrace) omnitrace --help omnitrace -e -v 1 -o ls.inst --simulate -- ls - for i in omnitrace-ls.inst-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done + for i in omnitrace-tests-output/ls.inst/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done omnitrace -e -v 1 -o ls.inst -- ls ./ls.inst omnitrace -e -v 1 --simulate -- ls - for i in omnitrace-ls-output/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done + for i in omnitrace-tests-output/ls/*; do echo -e "\n\n --> ${i} \n\n"; cat ${i}; done omnitrace -e -v 1 -- ls - name: Test User API diff --git a/projects/rocprofiler-systems/source/lib/omnitrace/library/components/backtrace.cpp b/projects/rocprofiler-systems/source/lib/omnitrace/library/components/backtrace.cpp index fa15a0352e..a4be543356 100644 --- a/projects/rocprofiler-systems/source/lib/omnitrace/library/components/backtrace.cpp +++ b/projects/rocprofiler-systems/source/lib/omnitrace/library/components/backtrace.cpp @@ -516,15 +516,15 @@ backtrace::post_process(int64_t _tid) _last_bt = _bt; _mean_ts += _ts; - TRACE_COUNTER("sampling", + TRACE_COUNTER("thread_peak_memory", perfetto_counter_track::at(_tid, 0), _ts, _bt->m_mem_peak / units::megabyte); - TRACE_COUNTER("sampling", + TRACE_COUNTER("thread_context_switch", perfetto_counter_track::at(_tid, 1), _ts, _bt->m_ctx_swch); - TRACE_COUNTER("sampling", + TRACE_COUNTER("thread_page_fault", perfetto_counter_track::at(_tid, 2), _ts, _bt->m_page_flt); @@ -535,7 +535,7 @@ backtrace::post_process(int64_t _tid) { if(i < _bt->m_hw_counter.size()) { - TRACE_COUNTER("sampling", + TRACE_COUNTER("hardware_counter", perfetto_counter_track::at(_tid, i), _ts, _bt->m_hw_counter.at(i)); } @@ -547,15 +547,15 @@ backtrace::post_process(int64_t _tid) { auto _ts = pthread_create_gotcha::get_execution_time(_tid)->second; uint64_t _zero = 0; - TRACE_COUNTER("sampling", + TRACE_COUNTER("thread_peak_memory", perfetto_counter_track::at(_tid, 0), _ts, _zero); - TRACE_COUNTER("sampling", + TRACE_COUNTER("thread_context_switch", perfetto_counter_track::at(_tid, 1), _ts, _zero); - TRACE_COUNTER("sampling", + TRACE_COUNTER("thread_page_fault", perfetto_counter_track::at(_tid, 2), _ts, _zero); @@ -566,7 +566,7 @@ backtrace::post_process(int64_t _tid) { if(i < _last_bt->m_hw_counter.size()) { - TRACE_COUNTER("sampling", + TRACE_COUNTER("hardware_counter", perfetto_counter_track::at(_tid, i), _ts, _zero); } @@ -601,10 +601,10 @@ backtrace::post_process(int64_t _tid) auto _ts = _bt->m_ts; if(!pthread_create_gotcha::is_valid_execution_time(_tid, _ts)) continue; - TRACE_EVENT_BEGIN("sampling", + TRACE_EVENT_BEGIN("hardware_counter", perfetto::StaticString{ sitr.first->c_str() }, _last_wall_ts, "begin_ns", _last_wall_ts); - TRACE_EVENT_END("sampling", _ts, "end_ns", _ts); + TRACE_EVENT_END("hardware_counter", _ts, "end_ns", _ts); } _last_wall_ts = _bt->m_ts; } diff --git a/projects/rocprofiler-systems/source/lib/omnitrace/library/components/category_region.hpp b/projects/rocprofiler-systems/source/lib/omnitrace/library/components/category_region.hpp index a7967d57d7..eb9505c5ac 100644 --- a/projects/rocprofiler-systems/source/lib/omnitrace/library/components/category_region.hpp +++ b/projects/rocprofiler-systems/source/lib/omnitrace/library/components/category_region.hpp @@ -145,7 +145,9 @@ category_region::audit(const gotcha_data_t& _data, audit::incoming, Args&&... _args) { OMNITRACE_SCOPED_THREAD_STATE(ThreadState::Internal); - start(_data.tool_id.c_str(), "args", JOIN(", ", _args...)); + start(_data.tool_id.c_str(), "args", + JOIN(", ", + JOIN('=', tim::try_demangle>(), _args)...)); } template diff --git a/projects/rocprofiler-systems/source/lib/omnitrace/library/components/fwd.hpp b/projects/rocprofiler-systems/source/lib/omnitrace/library/components/fwd.hpp index 69d1fd5cbe..c39113940f 100644 --- a/projects/rocprofiler-systems/source/lib/omnitrace/library/components/fwd.hpp +++ b/projects/rocprofiler-systems/source/lib/omnitrace/library/components/fwd.hpp @@ -61,8 +61,12 @@ TIMEMORY_DECLARE_TYPE_TRAIT(name, typename Tp) } TIMEMORY_DEFINE_NS_API(category, host) -TIMEMORY_DEFINE_NS_API(category, device) TIMEMORY_DEFINE_NS_API(category, user) +TIMEMORY_DEFINE_NS_API(category, device) +TIMEMORY_DEFINE_NS_API(category, device_hip) +TIMEMORY_DEFINE_NS_API(category, device_hsa) +TIMEMORY_DEFINE_NS_API(category, rocm_hip) +TIMEMORY_DEFINE_NS_API(category, rocm_hsa) TIMEMORY_DEFINE_NS_API(category, rocm_smi) TIMEMORY_DEFINE_NS_API(category, kokkos) TIMEMORY_DEFINE_NS_API(category, mpi) @@ -73,7 +77,11 @@ TIMEMORY_DEFINE_NS_API(category, device_critical_trace) TIMEMORY_DEFINE_NAME_TRAIT("host", category::host); TIMEMORY_DEFINE_NAME_TRAIT("device", category::device); +TIMEMORY_DEFINE_NAME_TRAIT("device_hip", category::device_hip); +TIMEMORY_DEFINE_NAME_TRAIT("device_hsa", category::device_hsa); TIMEMORY_DEFINE_NAME_TRAIT("user", category::user); +TIMEMORY_DEFINE_NAME_TRAIT("rocm_hip", category::rocm_hip); +TIMEMORY_DEFINE_NAME_TRAIT("rocm_hsa", category::rocm_hsa); TIMEMORY_DEFINE_NAME_TRAIT("rocm_smi", category::rocm_smi); TIMEMORY_DEFINE_NAME_TRAIT("sampling", category::sampling); TIMEMORY_DEFINE_NAME_TRAIT("thread_sampling", category::thread_sampling); diff --git a/projects/rocprofiler-systems/source/lib/omnitrace/library/components/rocm_smi.cpp b/projects/rocprofiler-systems/source/lib/omnitrace/library/components/rocm_smi.cpp index 31814c17f6..6620d47e53 100644 --- a/projects/rocprofiler-systems/source/lib/omnitrace/library/components/rocm_smi.cpp +++ b/projects/rocprofiler-systems/source/lib/omnitrace/library/components/rocm_smi.cpp @@ -246,10 +246,11 @@ data::post_process(uint32_t _dev_id) double _temp = itr.m_temp / 1.0e3; double _power = itr.m_power / 1.0e6; double _usage = itr.m_mem_usage / static_cast(units::megabyte); - TRACE_COUNTER("rocm_smi", counter_track::at(_dev_id, 0), _ts, _busy); - TRACE_COUNTER("rocm_smi", counter_track::at(_dev_id, 1), _ts, _temp); - TRACE_COUNTER("rocm_smi", counter_track::at(_dev_id, 2), _ts, _power); - TRACE_COUNTER("rocm_smi", counter_track::at(_dev_id, 3), _ts, _usage); + TRACE_COUNTER("device_busy", counter_track::at(_dev_id, 0), _ts, _busy); + TRACE_COUNTER("device_temp", counter_track::at(_dev_id, 1), _ts, _temp); + TRACE_COUNTER("device_power", counter_track::at(_dev_id, 2), _ts, _power); + TRACE_COUNTER("device_memory_usage", counter_track::at(_dev_id, 3), _ts, + _usage); } }; diff --git a/projects/rocprofiler-systems/source/lib/omnitrace/library/components/roctracer_callbacks.cpp b/projects/rocprofiler-systems/source/lib/omnitrace/library/components/roctracer_callbacks.cpp index 7ea557d32b..840e0d504f 100644 --- a/projects/rocprofiler-systems/source/lib/omnitrace/library/components/roctracer_callbacks.cpp +++ b/projects/rocprofiler-systems/source/lib/omnitrace/library/components/roctracer_callbacks.cpp @@ -28,6 +28,7 @@ #include "library/runtime.hpp" #include "library/sampling.hpp" #include "library/thread_data.hpp" +#include "library/tracing.hpp" #include #include @@ -38,6 +39,8 @@ #include #include +#define HIP_PROF_HIP_API_STRING 1 + #include #include #include @@ -45,12 +48,36 @@ #define AMD_INTERNAL_BUILD 1 #include +#if __has_include() || (defined(OMNITRACE_USE_HIP) && OMNITRACE_USE_HIP > 0) +# include +# define OMNITRACE_HIP_API_ARGS 1 +#else +# define OMNITRACE_HIP_API_ARGS 0 +#endif + TIMEMORY_DEFINE_API(roctracer) namespace omnitrace { namespace api = tim::api; namespace { +std::string +hip_api_string(hip_api_id_t id, const hip_api_data_t* data) +{ +#if OMNITRACE_HIP_API_ARGS > 0 + std::string _v = hipApiString(id, data); + if(_v.empty()) return _v; + auto _pbeg = _v.find('('); + if(_pbeg == std::string::npos) return _v; + auto _pend = _v.find_last_of(')'); + if(_pend == std::string::npos || _pbeg >= _pend) return _v; + auto _n = (_pend - _pbeg - 1); + return _v.substr(_pbeg + 1, _n); +#else + tim::consume_parameters(id, data); +#endif +} +// int64_t get_clock_skew() { @@ -275,11 +302,12 @@ hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* if(get_use_perfetto()) { - TRACE_EVENT_BEGIN("device", perfetto::StaticString{ _name }, - static_cast(begin_timestamp), "begin_ns", - static_cast(begin_timestamp)); - TRACE_EVENT_END("device", static_cast(end_timestamp), - "end_ns", static_cast(end_timestamp)); + uint64_t _beg_ts = begin_timestamp; + uint64_t _end_ts = end_timestamp; + tracing::push_perfetto_ts(category::rocm_hsa{}, _name, _beg_ts, + "begin_ns", _beg_ts); + tracing::pop_perfetto_ts(category::rocm_hsa{}, _name, _end_ts, + "end_ns", _end_ts); } if(get_use_timemory()) @@ -334,6 +362,9 @@ hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg) default: break; } + OMNITRACE_CI_FAIL(_name == nullptr, "Error! HSA operation type not handled: %u\n", + op); + if(!_name) return; auto _beg_ns = record->begin_ns + get_clock_skew(); @@ -347,11 +378,10 @@ hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg) if(get_use_perfetto()) { - TRACE_EVENT_BEGIN("device", perfetto::StaticString{ *_name }, - static_cast(_beg_ns), "begin_ns", - static_cast(_beg_ns)); - TRACE_EVENT_END("device", static_cast(_end_ns), "end_ns", - static_cast(_end_ns)); + uint64_t _beg = _beg_ns; + uint64_t _end = _end_ns; + tracing::push_perfetto_ts(category::device_hsa{}, *_name, _beg, "begin_ns", _beg); + tracing::pop_perfetto_ts(category::device_hsa{}, *_name, _end, "end_ns", _end); } auto _func = [_beg_ns, _end_ns, _name]() { @@ -582,11 +612,12 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* if(get_use_perfetto()) { - TRACE_EVENT_BEGIN( - "host", perfetto::StaticString{ op_name }, static_cast(_ts), - perfetto::Flow::ProcessScoped(_cid), "begin_ns", - static_cast(_ts), "pcid", _parent_cid, "cid", _cid, "device", - _device_id, "tid", _tid, "depth", _depth, "corr_id", _corr_id); + auto _api_id = static_cast(cid); + tracing::push_perfetto_ts( + category::rocm_hip{}, op_name, _ts, perfetto::Flow::ProcessScoped(_cid), + "begin_ns", static_cast(_ts), "pcid", _parent_cid, "cid", _cid, + "device", _device_id, "tid", _tid, "depth", _depth, "corr_id", _corr_id, + "args", hip_api_string(_api_id, data)); } if(get_use_timemory()) { @@ -622,8 +653,8 @@ hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* if(get_use_perfetto()) { - TRACE_EVENT_END("host", static_cast(_ts), "end_ns", - static_cast(_ts)); + tracing::pop_perfetto_ts(category::rocm_hip{}, op_name, _ts, "end_ns", + static_cast(_ts)); } if(get_use_timemory()) { @@ -779,14 +810,16 @@ hip_activity_callback(const char* begin, const char* end, void*) _kernel_names.emplace(_name, tim::demangle(_name)); assert(_end_ns > _beg_ns); - TRACE_EVENT_BEGIN("device", - perfetto::StaticString{ _kernel_names.at(_name).c_str() }, - _beg_ns, perfetto::Flow::ProcessScoped(_cid), "begin_ns", - _beg_ns, "corr_id", record->correlation_id, "device", - _devid, "queue", _queid, "op", _op_id_names.at(record->op)); - TRACE_EVENT_END("device", _end_ns, "end_ns", _end_ns); + tracing::push_perfetto_ts( + category::device_hip{}, _kernel_names.at(_name).c_str(), _beg_ns, + perfetto::Flow::ProcessScoped(_cid), "begin_ns", _beg_ns, "corr_id", + record->correlation_id, "device", _devid, "queue", _queid, "op", + _op_id_names.at(record->op)); + tracing::pop_perfetto_ts(category::device_hip{}, "", _end_ns, "end_ns", + _end_ns); // for some reason, this is necessary to make sure very last one ends - TRACE_EVENT_END("device", _end_ns, "end_ns", _end_ns); + tracing::pop_perfetto_ts(category::device_hip{}, "", _end_ns, "end_ns", + _end_ns); } if(_critical_trace) diff --git a/projects/rocprofiler-systems/source/lib/omnitrace/library/cpu_freq.cpp b/projects/rocprofiler-systems/source/lib/omnitrace/library/cpu_freq.cpp index 5a8cbba242..79bd4d982a 100644 --- a/projects/rocprofiler-systems/source/lib/omnitrace/library/cpu_freq.cpp +++ b/projects/rocprofiler-systems/source/lib/omnitrace/library/cpu_freq.cpp @@ -80,7 +80,24 @@ void init_perfetto_counter_tracks(type_list) (perfetto_counter_track::init(), ...); } } // namespace +} // namespace cpu_freq +} // namespace omnitrace +TIMEMORY_DEFINE_NAME_TRAIT("cpu_freq", omnitrace::cpu_freq::cpu_freq); +TIMEMORY_DEFINE_NAME_TRAIT("process_page_fault", omnitrace::cpu_freq::cpu_page); +TIMEMORY_DEFINE_NAME_TRAIT("process_virtual_memory", omnitrace::cpu_freq::cpu_virt); +TIMEMORY_DEFINE_NAME_TRAIT("process_context_switch", + omnitrace::cpu_freq::cpu_context_switch); +TIMEMORY_DEFINE_NAME_TRAIT("process_page_fault", omnitrace::cpu_freq::cpu_page_fault); +TIMEMORY_DEFINE_NAME_TRAIT("process_user_cpu_time", + omnitrace::cpu_freq::cpu_user_mode_time); +TIMEMORY_DEFINE_NAME_TRAIT("process_kernel_cpu_time", + omnitrace::cpu_freq::cpu_kernel_mode_time); + +namespace omnitrace +{ +namespace cpu_freq +{ void setup() { @@ -230,7 +247,7 @@ void write_perfetto_counter_track(Args... _args) { using track = perfetto_counter_track; - TRACE_COUNTER("sampling", track::at(0, 0), _args...); + TRACE_COUNTER(trait::name::value, track::at(0, 0), _args...); } } // namespace @@ -254,11 +271,11 @@ post_process() uint64_t _ts = std::get<0>(itr); double _freq = std::get<7>(itr).at(_offset); if(!pthread_create_gotcha::is_valid_execution_time(0, _ts)) continue; - TRACE_COUNTER("sampling", freq_track::at(_idx, 0), _ts, _freq); + TRACE_COUNTER("cpu_freq", freq_track::at(_idx, 0), _ts, _freq); } auto _end_ts = pthread_create_gotcha::get_execution_time(0)->second; - TRACE_COUNTER("sampling", freq_track::at(_idx, 0), _end_ts, 0); + TRACE_COUNTER("cpu_freq", freq_track::at(_idx, 0), _end_ts, 0); }; auto _process_cpu_rusage = []() { diff --git a/projects/rocprofiler-systems/source/lib/omnitrace/library/perfetto.hpp b/projects/rocprofiler-systems/source/lib/omnitrace/library/perfetto.hpp index 7811f55d34..dac3f9e848 100644 --- a/projects/rocprofiler-systems/source/lib/omnitrace/library/perfetto.hpp +++ b/projects/rocprofiler-systems/source/lib/omnitrace/library/perfetto.hpp @@ -24,56 +24,72 @@ #include "library/defines.hpp" -#if defined(PERFETTO_CATEGORIES) -# error "PERFETTO_CATEGORIES is already defined. Please include \"" __FILE__ "\" before including any timemory files" +#if defined(OMNITRACE_PERFETTO_CATEGORIES) +# error "OMNITRACE_PERFETTO_CATEGORIES is already defined. Please include \"" __FILE__ "\" before including any timemory files" #endif -#if !defined(TIMEMORY_USE_PERFETTO) -# include -# define PERFETTO_CATEGORIES \ - perfetto::Category("host").SetDescription("Host-side function tracing"), \ - perfetto::Category("device").SetDescription("Device-side function tracing"), \ - perfetto::Category("user").SetDescription("User-defined regions"), \ - perfetto::Category("rocm_smi").SetDescription("Device-level metrics"), \ - perfetto::Category("sampling") \ - .SetDescription("Metrics derived from sampling"), \ - perfetto::Category("thread_sampling") \ - .SetDescription("Metrics derived from background thread sampling"), \ - perfetto::Category("mpi").SetDescription("MPI regions"), \ - perfetto::Category("kokkos").SetDescription("Kokkos regions"), \ - perfetto::Category("ompt").SetDescription("OpenMP Tools regions"), \ - perfetto::Category("critical-trace") \ - .SetDescription("Combined critical traces"), \ - perfetto::Category("host-critical-trace") \ - .SetDescription("Host-side critical traces"), \ - perfetto::Category("device-critical-trace") \ - .SetDescription("Device-side critical traces") +#define OMNITRACE_PERFETTO_CATEGORIES \ + perfetto::Category("host").SetDescription("Host-side function tracing"), \ + perfetto::Category("user").SetDescription("User-defined regions"), \ + perfetto::Category("device_hip") \ + .SetDescription("Device-side functions submitted via HSA API"), \ + perfetto::Category("device_hsa") \ + .SetDescription("Device-side functions submitted via HIP API"), \ + perfetto::Category("rocm_hip").SetDescription("Host-side HIP functions"), \ + perfetto::Category("rocm_hsa").SetDescription("Host-side HSA functions"), \ + perfetto::Category("device_busy") \ + .SetDescription("Busy percentage of a GPU device"), \ + perfetto::Category("device_temp") \ + .SetDescription("Temperature of GPU device in degC"), \ + perfetto::Category("device_power") \ + .SetDescription("Power consumption of GPU device in watts"), \ + perfetto::Category("device_memory_usage") \ + .SetDescription("Memory usage of GPU device in MB"), \ + perfetto::Category("thread_peak_memory") \ + .SetDescription( \ + "Peak memory usage on thread in MB (derived from sampling)"), \ + perfetto::Category("thread_context_switch") \ + .SetDescription("Context switches on thread (derived from sampling)"), \ + perfetto::Category("thread_page_fault") \ + .SetDescription("Memory page faults on thread (derived from sampling)"), \ + perfetto::Category("hardware_counter") \ + .SetDescription("Hardware counter value on thread (derived from sampling)"), \ + perfetto::Category("cpu_freq") \ + .SetDescription("CPU frequency in MHz (collected in background thread)"), \ + perfetto::Category("process_page_fault") \ + .SetDescription( \ + "Memory page faults in process (collected in background thread)"), \ + perfetto::Category("process_virtual_memory") \ + .SetDescription("Virtual memory usage in process in MB (collected in " \ + "background thread)"), \ + perfetto::Category("process_context_switch") \ + .SetDescription( \ + "Context switches in process (collected in background thread)"), \ + perfetto::Category("process_page_fault") \ + .SetDescription( \ + "Memory page faults in process (collected in background thread)"), \ + perfetto::Category("process_user_cpu_time") \ + .SetDescription("CPU time of functions executing in user-space in process " \ + "in seconds (collected in background thread)"), \ + perfetto::Category("process_kernel_cpu_time") \ + .SetDescription("CPU time of functions executing in kernel-space in " \ + "process in seconds (collected in background thread)"), \ + perfetto::Category("mpi").SetDescription("MPI regions"), \ + perfetto::Category("kokkos").SetDescription("Kokkos regions"), \ + perfetto::Category("ompt").SetDescription("OpenMP Tools regions"), \ + perfetto::Category("critical-trace").SetDescription("Combined critical traces"), \ + perfetto::Category("host-critical-trace") \ + .SetDescription("Host-side critical traces"), \ + perfetto::Category("device-critical-trace") \ + .SetDescription("Device-side critical traces"), \ + perfetto::Category("timemory").SetDescription("Events from the timemory API") + +#if defined(TIMEMORY_USE_PERFETTO) +# define TIMEMORY_PERFETTO_CATEGORIES OMNITRACE_PERFETTO_CATEGORIES +# include #else -# define PERFETTO_CATEGORIES \ - perfetto::Category("host").SetDescription("Host-side function tracing"), \ - perfetto::Category("device").SetDescription("Device-side function tracing"), \ - perfetto::Category("user").SetDescription("User-defined regions"), \ - perfetto::Category("rocm_smi").SetDescription("Device-level metrics"), \ - perfetto::Category("sampling") \ - .SetDescription("Metrics derived from sampling"), \ - perfetto::Category("thread_sampling") \ - .SetDescription("Metrics derived from background thread sampling"), \ - perfetto::Category("mpi").SetDescription("MPI regions"), \ - perfetto::Category("kokkos").SetDescription("Kokkos regions"), \ - perfetto::Category("ompt").SetDescription("OpenMP Tools regions"), \ - perfetto::Category("critical-trace") \ - .SetDescription("Combined critical traces"), \ - perfetto::Category("host-critical-trace") \ - .SetDescription("Host-side critical traces"), \ - perfetto::Category("device-critical-trace") \ - .SetDescription("Device-side critical traces"), \ - perfetto::Category("timemory") \ - .SetDescription("Events from the timemory API") -# define TIMEMORY_PERFETTO_CATEGORIES PERFETTO_CATEGORIES -#endif - -#if !defined(TIMEMORY_USE_PERFETTO) -PERFETTO_DEFINE_CATEGORIES(PERFETTO_CATEGORIES); +# include +PERFETTO_DEFINE_CATEGORIES(OMNITRACE_PERFETTO_CATEGORIES); #endif #include "library/debug.hpp" diff --git a/projects/rocprofiler-systems/tests/CMakeLists.txt b/projects/rocprofiler-systems/tests/CMakeLists.txt index 36d1a32051..6d250b8fe9 100644 --- a/projects/rocprofiler-systems/tests/CMakeLists.txt +++ b/projects/rocprofiler-systems/tests/CMakeLists.txt @@ -1018,8 +1018,9 @@ foreach(_VERSION ${OMNITRACE_PYTHON_VERSIONS}) endif() function(OMNITRACE_ADD_PYTHON_VALIDATION_TEST) - cmake_parse_arguments(TEST "" "NAME;TIMEMORY_METRIC;TIMEMORY_FILE;PERFETTO_FILE" - "ARGS" ${ARGN}) + cmake_parse_arguments( + TEST "" "NAME;TIMEMORY_METRIC;TIMEMORY_FILE;PERFETTO_METRIC;PERFETTO_FILE" + "ARGS" ${ARGN}) omnitrace_add_python_test( NAME ${TEST_NAME}-validate-timemory @@ -1037,7 +1038,7 @@ foreach(_VERSION ${OMNITRACE_PYTHON_VERSIONS}) NAME ${TEST_NAME}-validate-perfetto COMMAND ${_PYTHON_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/validate-perfetto-proto.py - ${TEST_ARGS} -p -i + -m ${TEST_PERFETTO_METRIC} ${TEST_ARGS} -p -i PYTHON_VERSION ${_VERSION} FILE omnitrace-tests-output/${TEST_NAME}/${_VERSION}/${TEST_PERFETTO_FILE} DEPENDS ${TEST_NAME}-${_VERSION} @@ -1050,6 +1051,7 @@ foreach(_VERSION ${OMNITRACE_PYTHON_VERSIONS}) NAME python-source TIMEMORY_METRIC "trip_count" TIMEMORY_FILE "trip_count.json" + PERFETTO_METRIC "host;user" PERFETTO_FILE "perfetto-trace.proto" ARGS -l run diff --git a/projects/rocprofiler-systems/tests/validate-perfetto-proto.py b/projects/rocprofiler-systems/tests/validate-perfetto-proto.py index 3077de74d5..a1d90fa6c5 100755 --- a/projects/rocprofiler-systems/tests/validate-perfetto-proto.py +++ b/projects/rocprofiler-systems/tests/validate-perfetto-proto.py @@ -40,6 +40,9 @@ if __name__ == "__main__": parser.add_argument( "-d", "--depths", nargs="+", type=int, help="Expected depths", default=[] ) + parser.add_argument( + "-m", "--categories", nargs="+", help="Perfetto categories", default=[] + ) parser.add_argument( "-p", "--print", action="store_true", help="Print the processed perfetto data" ) @@ -55,9 +58,11 @@ if __name__ == "__main__": tp = TraceProcessor(trace=(args.input)) pdata = {} # get data from perfetto - qr_it = tp.query("SELECT name, depth FROM slice") + qr_it = tp.query("SELECT name, depth, category FROM slice") # loop over data rows from perfetto for row in qr_it: + if args.categories and row.category not in args.categories: + continue if row.name not in pdata: pdata[row.name] = {} if row.depth not in pdata[row.name]: