2c4e20b951
* [SWDEV-516561][1/2] Add MARKER_RANGE_EXTENT to capture ROCTX ranges
Range extent to capture all work between roctxpush/pop operations. Entry callback takes place during roxtxpush and exit callback takes place in roctxpop. This is primarily to allow us to keep an ancestor id on the ancestor stack such that all operations that take place within the push/pop context can be annotated as being apart of this range. With the current setup (where push and pop are two separate operations that need to be combined externally), we cannot keep an ancestor id on the stack and thus cannot tie tracing events to particular ranges.
Correlation id information is inherited from the push operation. Ancestor id needs to be added in a future commit that also outputs this ancestor to CSV.
Output:
```
[ctest] {'size': 64, 'kind': 7, 'operation': 1, 'correlation_id': {'internal': 1525, 'external': 0, 'ancestor': 1524}, 'start_timestamp': 2932551479402642, 'end_timestamp': 2932551491178449, 'thread_id': 3254861}
[ctest] {'size': 64, 'kind': 8, 'operation': 2, 'correlation_id': {'internal': 1525, 'external': 0, 'ancestor': 1524}, 'start_timestamp': 2932551479405878, 'end_timestamp': 2932551491181214, 'thread_id': 3254861}
```
Note: Kind 8 = range extent op.
* Merge fix
Revert several changes
source/lib/rocprofiler-sdk/marker/range_marker.*
- separate out range marker implementation for standard marker implementation
Update public API with marker core range
Support marker core range in sdk (source/lib/rocprofiler-sdk)
Transition rocprofiler-sdk-tool and output lib to use marker core range
Misc fixes for tests
Fix logic in lib/output/generate{CSV,Stats}.cpp
Update tests/rocprofv3/tracing-hip-in-libraries (marker validation)
Fix test_otf2_data
* Test fixes
---------
Co-authored-by: Benjamin Welton <bewelton@amd.com>
3077 γραμμές
124 KiB
C++
3077 γραμμές
124 KiB
C++
// MIT License
|
|
//
|
|
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in all
|
|
// copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
// SOFTWARE.
|
|
|
|
#define _GNU_SOURCE 1
|
|
#define _DEFAULT_SOURCE 1
|
|
|
|
#include "config.hpp"
|
|
#include "execution_profile.hpp"
|
|
#include "helper.hpp"
|
|
#include "stream_stack.hpp"
|
|
|
|
#include "lib/att-tool/att_lib_wrapper.hpp"
|
|
#include "lib/common/environment.hpp"
|
|
#include "lib/common/filesystem.hpp"
|
|
#include "lib/common/logging.hpp"
|
|
#include "lib/common/scope_destructor.hpp"
|
|
#include "lib/common/simple_timer.hpp"
|
|
#include "lib/common/static_object.hpp"
|
|
#include "lib/common/static_tl_object.hpp"
|
|
#include "lib/common/string_entry.hpp"
|
|
#include "lib/common/synchronized.hpp"
|
|
#include "lib/common/units.hpp"
|
|
#include "lib/common/utility.hpp"
|
|
#include "lib/output/buffered_output.hpp"
|
|
#include "lib/output/counter_info.hpp"
|
|
#include "lib/output/csv.hpp"
|
|
#include "lib/output/csv_output_file.hpp"
|
|
#include "lib/output/domain_type.hpp"
|
|
#include "lib/output/generateCSV.hpp"
|
|
#include "lib/output/generateJSON.hpp"
|
|
#include "lib/output/generateOTF2.hpp"
|
|
#include "lib/output/generatePerfetto.hpp"
|
|
#include "lib/output/generateRocpd.hpp"
|
|
#include "lib/output/generateStats.hpp"
|
|
#include "lib/output/metadata.hpp"
|
|
#include "lib/output/output_stream.hpp"
|
|
#include "lib/output/statistics.hpp"
|
|
#include "lib/output/stream_info.hpp"
|
|
#include "lib/output/timestamps.hpp"
|
|
#include "lib/output/tmp_file.hpp"
|
|
#include "lib/output/tmp_file_buffer.hpp"
|
|
|
|
#include <rocprofiler-sdk/agent.h>
|
|
#include <rocprofiler-sdk/buffer_tracing.h>
|
|
#include <rocprofiler-sdk/callback_tracing.h>
|
|
#include <rocprofiler-sdk/defines.h>
|
|
#include <rocprofiler-sdk/dispatch_counting_service.h>
|
|
#include <rocprofiler-sdk/experimental/counters.h>
|
|
#include <rocprofiler-sdk/experimental/thread_trace.h>
|
|
#include <rocprofiler-sdk/external_correlation.h>
|
|
#include <rocprofiler-sdk/fwd.h>
|
|
#include <rocprofiler-sdk/intercept_table.h>
|
|
#include <rocprofiler-sdk/internal_threading.h>
|
|
#include <rocprofiler-sdk/marker/api_id.h>
|
|
#include <rocprofiler-sdk/rocprofiler.h>
|
|
#include <rocprofiler-sdk/version.h>
|
|
#include <rocprofiler-sdk/cxx/hash.hpp>
|
|
#include <rocprofiler-sdk/cxx/operators.hpp>
|
|
|
|
#include <fmt/core.h>
|
|
|
|
#include <time.h>
|
|
#include <unistd.h>
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <chrono>
|
|
#include <csignal>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <future>
|
|
#include <iomanip>
|
|
#include <limits>
|
|
#include <mutex>
|
|
#include <optional>
|
|
#include <shared_mutex>
|
|
#include <thread>
|
|
#include <type_traits>
|
|
#include <unordered_map>
|
|
#include <unordered_set>
|
|
#include <vector>
|
|
|
|
#include <dlfcn.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/types.h>
|
|
#include <sys/wait.h>
|
|
|
|
#if defined(CODECOV) && CODECOV > 0
|
|
extern "C" {
|
|
extern void
|
|
__gcov_dump(void);
|
|
}
|
|
#endif
|
|
|
|
namespace common = ::rocprofiler::common;
|
|
namespace tool = ::rocprofiler::tool;
|
|
|
|
extern "C" {
|
|
void
|
|
rocprofv3_error_signal_handler(int signo, siginfo_t*, void*);
|
|
}
|
|
|
|
namespace
|
|
{
|
|
using sigaction_t = struct sigaction;
|
|
using signal_func_t = sighandler_t (*)(int signum, sighandler_t handler);
|
|
using sigaction_func_t = int (*)(int signum,
|
|
const struct sigaction* __restrict__ act,
|
|
struct sigaction* __restrict__ oldact);
|
|
|
|
constexpr auto rocprofv3_num_signals = NSIG;
|
|
constexpr auto rocprofv3_handled_signals = std::array<int, 4>{SIGINT, SIGQUIT, SIGABRT, SIGTERM};
|
|
|
|
auto destructors = new std::vector<std::function<void()>>{};
|
|
|
|
template <typename Tp>
|
|
Tp&
|
|
get_dereference(Tp* ptr)
|
|
{
|
|
return *CHECK_NOTNULL(ptr);
|
|
}
|
|
|
|
auto
|
|
get_destructors_lock()
|
|
{
|
|
static auto _mutex = std::mutex{};
|
|
return std::unique_lock<std::mutex>{_mutex};
|
|
}
|
|
|
|
template <typename Tp>
|
|
Tp*&
|
|
add_destructor(Tp*& ptr)
|
|
{
|
|
auto _lk = get_destructors_lock();
|
|
destructors->emplace_back([&ptr]() {
|
|
delete ptr;
|
|
ptr = nullptr;
|
|
});
|
|
return ptr;
|
|
}
|
|
|
|
struct chained_siginfo
|
|
{
|
|
int signo = 0;
|
|
sighandler_t handler = nullptr;
|
|
std::optional<sigaction_t> action = {};
|
|
};
|
|
|
|
auto&
|
|
get_chained_signals()
|
|
{
|
|
using data_type = std::array<std::optional<chained_siginfo>, rocprofv3_num_signals>;
|
|
static auto*& _v = common::static_object<data_type>::construct();
|
|
return *CHECK_NOTNULL(_v);
|
|
}
|
|
|
|
bool
|
|
is_handled_signal(int signum)
|
|
{
|
|
for(auto itr : rocprofv3_handled_signals)
|
|
if(itr == signum) return true;
|
|
return false;
|
|
}
|
|
|
|
struct buffer_ids
|
|
{
|
|
rocprofiler_buffer_id_t hsa_api_trace = {};
|
|
rocprofiler_buffer_id_t hip_api_trace = {};
|
|
rocprofiler_buffer_id_t kernel_trace = {};
|
|
rocprofiler_buffer_id_t memory_copy_trace = {};
|
|
rocprofiler_buffer_id_t memory_allocation_trace = {};
|
|
rocprofiler_buffer_id_t counter_collection = {};
|
|
rocprofiler_buffer_id_t scratch_memory = {};
|
|
rocprofiler_buffer_id_t rccl_api_trace = {};
|
|
rocprofiler_buffer_id_t pc_sampling_host_trap = {};
|
|
rocprofiler_buffer_id_t rocdecode_api_trace = {};
|
|
rocprofiler_buffer_id_t rocjpeg_api_trace = {};
|
|
rocprofiler_buffer_id_t pc_sampling_stochastic = {};
|
|
|
|
auto as_array() const
|
|
{
|
|
return std::array<rocprofiler_buffer_id_t, 12>{hsa_api_trace,
|
|
hip_api_trace,
|
|
kernel_trace,
|
|
memory_copy_trace,
|
|
memory_allocation_trace,
|
|
counter_collection,
|
|
scratch_memory,
|
|
rccl_api_trace,
|
|
pc_sampling_host_trap,
|
|
rocdecode_api_trace,
|
|
rocjpeg_api_trace,
|
|
pc_sampling_stochastic};
|
|
}
|
|
auto pc_sampling_buffers_as_array() const
|
|
{
|
|
return std::array<rocprofiler_buffer_id_t, 2>{pc_sampling_host_trap,
|
|
pc_sampling_stochastic};
|
|
}
|
|
};
|
|
|
|
buffer_ids&
|
|
get_buffers()
|
|
{
|
|
static auto _v = buffer_ids{};
|
|
return _v;
|
|
}
|
|
|
|
template <typename Tp>
|
|
Tp*
|
|
as_pointer(Tp&& _val)
|
|
{
|
|
return new Tp{std::forward<Tp>(_val)};
|
|
}
|
|
|
|
template <typename Tp, typename... Args>
|
|
Tp*
|
|
as_pointer(Args&&... _args)
|
|
{
|
|
return new Tp{std::forward<Args>(_args)...};
|
|
}
|
|
|
|
template <typename Tp>
|
|
Tp*
|
|
as_pointer()
|
|
{
|
|
return new Tp{};
|
|
}
|
|
|
|
using targeted_kernels_map_t =
|
|
std::unordered_map<rocprofiler_kernel_id_t, std::unordered_set<size_t>>;
|
|
using counter_dimension_info_map_t =
|
|
std::unordered_map<uint64_t, std::vector<rocprofiler_counter_record_dimension_info_t>>;
|
|
using agent_info_map_t = std::unordered_map<rocprofiler_agent_id_t, rocprofiler_agent_t>;
|
|
using kernel_iteration_t = std::unordered_map<rocprofiler_kernel_id_t, size_t>;
|
|
using kernel_rename_map_t = std::unordered_map<uint64_t, uint64_t>;
|
|
using kernel_rename_stack_t = std::stack<uint64_t>;
|
|
|
|
auto* tool_metadata = as_pointer<tool::metadata>(tool::metadata::inprocess{});
|
|
auto target_kernels = common::Synchronized<targeted_kernels_map_t>{};
|
|
auto* execution_profile = as_pointer<common::Synchronized<tool::execution_profile_data>>();
|
|
auto counter_collection_ctx = rocprofiler_context_id_t{0};
|
|
std::mutex att_shader_data;
|
|
|
|
thread_local auto thread_dispatch_rename = as_pointer<kernel_rename_stack_t>();
|
|
thread_local auto thread_dispatch_rename_dtor = common::scope_destructor{[]() {
|
|
delete thread_dispatch_rename;
|
|
thread_dispatch_rename = nullptr;
|
|
}};
|
|
|
|
// Stores stream ids and kernel region ids for kernel-rename service and hip stream display service
|
|
struct kernel_rename_and_stream_data
|
|
{
|
|
uint64_t region_id = 0; // roctx region correlation id
|
|
rocprofiler_stream_id_t stream_id = {.handle = 0};
|
|
};
|
|
|
|
bool
|
|
add_kernel_target(uint64_t _kern_id, const std::unordered_set<size_t>& range)
|
|
{
|
|
return target_kernels
|
|
.wlock(
|
|
[](targeted_kernels_map_t& _targets_v,
|
|
uint64_t _kern_id_v,
|
|
const std::unordered_set<size_t>& _range) {
|
|
return _targets_v.emplace(_kern_id_v, _range);
|
|
},
|
|
_kern_id,
|
|
range)
|
|
.second;
|
|
}
|
|
|
|
bool
|
|
is_targeted_kernel(uint64_t _kern_id,
|
|
common::Synchronized<kernel_iteration_t, true>& _kernel_iteration)
|
|
{
|
|
const std::unordered_set<size_t>* range = target_kernels.rlock(
|
|
[](const auto& _targets_v, uint64_t _kern_id_v) -> const std::unordered_set<size_t>* {
|
|
if(_targets_v.find(_kern_id_v) != _targets_v.end()) return &_targets_v.at(_kern_id_v);
|
|
return nullptr;
|
|
},
|
|
_kern_id);
|
|
|
|
if(range)
|
|
{
|
|
_kernel_iteration.wlock(
|
|
[](auto& _kernel_iter, rocprofiler_kernel_id_t _kernel_id) {
|
|
auto itr = _kernel_iter.find(_kernel_id);
|
|
if(itr == _kernel_iter.end())
|
|
_kernel_iter.emplace(_kernel_id, 1);
|
|
else
|
|
itr->second++;
|
|
},
|
|
_kern_id);
|
|
|
|
return _kernel_iteration.rlock(
|
|
[](const auto& _kernel_iter,
|
|
uint64_t _kernel_id,
|
|
const std::unordered_set<size_t>& _range) {
|
|
auto itr = _kernel_iter.at(_kernel_id);
|
|
// If the iteration range is not given then all iterations of the kernel is profiled
|
|
if(_range.empty())
|
|
{
|
|
if(!tool::get_config().advanced_thread_trace)
|
|
return true;
|
|
else if(itr == 1)
|
|
return true;
|
|
}
|
|
else if(_range.find(itr) != _range.end())
|
|
return true;
|
|
return false;
|
|
},
|
|
_kern_id,
|
|
*range);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
auto&
|
|
get_client_ctx()
|
|
{
|
|
static rocprofiler_context_id_t context_id{0};
|
|
return context_id;
|
|
}
|
|
|
|
void
|
|
flush()
|
|
{
|
|
constexpr auto null_buffer_id = rocprofiler_buffer_id_t{.handle = 0};
|
|
|
|
ROCP_INFO << "flushing buffers...";
|
|
for(auto itr : get_buffers().as_array())
|
|
{
|
|
if(itr > null_buffer_id)
|
|
{
|
|
ROCP_INFO << "flushing buffer " << itr.handle;
|
|
ROCPROFILER_CALL(rocprofiler_flush_buffer(itr), "buffer flush");
|
|
}
|
|
}
|
|
ROCP_INFO << "Buffers flushed";
|
|
}
|
|
|
|
void
|
|
collection_period_cntrl(std::promise<void>&& _promise, rocprofiler_context_id_t _ctx)
|
|
{
|
|
bool testing_cp = tool::get_env("ROCPROF_COLLECTION_PERIOD_TESTING", false);
|
|
auto log_fname = get_output_filename(tool::get_config(), "collection_periods", "log");
|
|
auto output_testing_file = std::ofstream{};
|
|
|
|
if(testing_cp)
|
|
{
|
|
ROCP_INFO << "collection period test logging enabled: " << log_fname;
|
|
output_testing_file.open(log_fname);
|
|
}
|
|
|
|
auto log_period = [testing_cp, &output_testing_file](
|
|
std::string_view label, auto _func, auto... _args) {
|
|
ROCP_INFO << "collection period: " << label;
|
|
|
|
auto beg = rocprofiler_timestamp_t{};
|
|
if(testing_cp)
|
|
{
|
|
rocprofiler_get_timestamp(&beg);
|
|
}
|
|
|
|
_func(_args...);
|
|
|
|
if(testing_cp)
|
|
{
|
|
auto end = rocprofiler_timestamp_t{};
|
|
rocprofiler_get_timestamp(&end);
|
|
output_testing_file << label << ":" << beg << ":" << end << '\n' << std::flush;
|
|
}
|
|
};
|
|
|
|
auto sleep_for_nsec = [](auto _value) {
|
|
if(_value > 0)
|
|
{
|
|
std::this_thread::yield();
|
|
std::this_thread::sleep_for(std::chrono::nanoseconds{_value});
|
|
}
|
|
};
|
|
|
|
auto periods = tool::get_config().collection_periods;
|
|
_promise.set_value(); // allow the launching thread to proceed
|
|
while(!periods.empty())
|
|
{
|
|
auto _period = periods.front();
|
|
periods.pop();
|
|
|
|
auto execute_period = [&]() {
|
|
if(testing_cp) output_testing_file << "--" << '\n';
|
|
|
|
log_period("delay", sleep_for_nsec, _period.delay);
|
|
log_period("start", rocprofiler_start_context, _ctx);
|
|
log_period("duration", sleep_for_nsec, _period.duration);
|
|
log_period("stop", rocprofiler_stop_context, _ctx);
|
|
};
|
|
|
|
if(_period.repeat == 0)
|
|
{
|
|
execute_period();
|
|
}
|
|
else
|
|
{
|
|
for(size_t i = 0; i < _period.repeat; ++i)
|
|
{
|
|
execute_period();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
int
|
|
record_execution_profile(rocprofiler_thread_id_t thr_id,
|
|
rocprofiler_context_id_t ctx_id,
|
|
rocprofiler_external_correlation_id_request_kind_t kind,
|
|
rocprofiler_tracing_operation_t op,
|
|
uint64_t /*internal_corr_id*/,
|
|
rocprofiler_user_data_t* /*external_corr_id*/,
|
|
void* /*user_data*/)
|
|
{
|
|
auto _record_data = [](tool::execution_profile_data& _data,
|
|
rocprofiler_thread_id_t _thr_id,
|
|
rocprofiler_context_id_t _ctx_id,
|
|
rocprofiler_external_correlation_id_request_kind_t _kind,
|
|
rocprofiler_tracing_operation_t _op) {
|
|
_data.category_count[_kind] += 1;
|
|
_data.category_op_count[_kind].emplace(_op);
|
|
_data.threads.emplace(_thr_id);
|
|
_data.contexts.emplace(_ctx_id);
|
|
};
|
|
|
|
if(execution_profile)
|
|
execution_profile->wlock(std::move(_record_data), thr_id, ctx_id, kind, op);
|
|
|
|
return 0;
|
|
}
|
|
|
|
template <typename Tp>
|
|
rocprofiler_stream_id_t
|
|
get_stream_id(Tp* _record)
|
|
{
|
|
auto _stream_id = rocprofiler_stream_id_t{.handle = 0};
|
|
if(_record->correlation_id.external.ptr != nullptr)
|
|
{
|
|
// Extract the stream id
|
|
auto* _ecid_data =
|
|
static_cast<kernel_rename_and_stream_data*>(_record->correlation_id.external.ptr);
|
|
_stream_id = _ecid_data->stream_id;
|
|
auto _region_id = _ecid_data->region_id;
|
|
_record->correlation_id.external.value = _region_id;
|
|
delete _ecid_data;
|
|
}
|
|
return _stream_id;
|
|
}
|
|
|
|
int
|
|
set_kernel_rename_and_stream_correlation_id(rocprofiler_thread_id_t thr_id,
|
|
rocprofiler_context_id_t ctx_id,
|
|
rocprofiler_external_correlation_id_request_kind_t kind,
|
|
rocprofiler_tracing_operation_t op,
|
|
uint64_t internal_corr_id,
|
|
rocprofiler_user_data_t* external_corr_id,
|
|
void* user_data)
|
|
{
|
|
// Check whether services are enabled
|
|
const bool kernel_rename_service_enabled =
|
|
kind == ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH &&
|
|
tool::get_config().kernel_rename && thread_dispatch_rename != nullptr &&
|
|
!thread_dispatch_rename->empty();
|
|
|
|
const bool hip_stream_enabled =
|
|
!tool::get_config().group_by_queue && rocprofiler::tool::stream::stream_stack_not_null();
|
|
|
|
if(!kernel_rename_service_enabled && !hip_stream_enabled) return 1;
|
|
|
|
auto* _info = new kernel_rename_and_stream_data{};
|
|
|
|
// Get value for kernel rename service
|
|
if(kernel_rename_service_enabled)
|
|
{
|
|
_info->region_id = thread_dispatch_rename->top();
|
|
if(tool_metadata) tool_metadata->add_external_correlation_id(_info->region_id);
|
|
}
|
|
|
|
// Get stream ID from stream HIP display service
|
|
if(hip_stream_enabled)
|
|
{
|
|
_info->stream_id = rocprofiler::tool::stream::get_stream_id();
|
|
}
|
|
|
|
// Set the external correlation id service to point to struct
|
|
external_corr_id->ptr = _info;
|
|
|
|
common::consume_args(thr_id, ctx_id, kind, op, internal_corr_id, user_data);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
cntrl_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
|
rocprofiler_user_data_t* user_data,
|
|
void* cb_data)
|
|
{
|
|
auto* ctx = static_cast<rocprofiler_context_id_t*>(cb_data);
|
|
|
|
if(ctx && record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API)
|
|
{
|
|
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER &&
|
|
record.operation == ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerPause)
|
|
{
|
|
ROCPROFILER_CALL(rocprofiler_stop_context(*ctx), "pausing context");
|
|
}
|
|
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT &&
|
|
record.operation == ROCPROFILER_MARKER_CONTROL_API_ID_roctxProfilerResume)
|
|
{
|
|
ROCPROFILER_CALL(rocprofiler_start_context(*ctx), "resuming context");
|
|
}
|
|
|
|
auto ts = rocprofiler_timestamp_t{};
|
|
rocprofiler_get_timestamp(&ts);
|
|
|
|
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
|
|
{
|
|
user_data->value = ts;
|
|
}
|
|
else
|
|
{
|
|
auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{};
|
|
marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t);
|
|
marker_record.kind = convert_marker_tracing_kind(record.kind);
|
|
marker_record.operation = record.operation;
|
|
marker_record.thread_id = record.thread_id;
|
|
marker_record.correlation_id = record.correlation_id;
|
|
marker_record.start_timestamp = user_data->value;
|
|
marker_record.end_timestamp = ts;
|
|
tool::write_ring_buffer(marker_record, domain_type::MARKER);
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
kernel_rename_callback(rocprofiler_callback_tracing_record_t record,
|
|
rocprofiler_user_data_t* user_data,
|
|
void* data)
|
|
{
|
|
if(!tool::get_config().kernel_rename || thread_dispatch_rename == nullptr) return;
|
|
|
|
if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API)
|
|
{
|
|
auto* marker_data =
|
|
static_cast<rocprofiler_callback_tracing_marker_api_data_t*>(record.payload);
|
|
auto add_message = [](std::string_view val) {
|
|
auto _hash_v = common::add_string_entry(val);
|
|
return std::string_view{*common::get_string_entry(_hash_v)};
|
|
};
|
|
|
|
if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA &&
|
|
record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT && marker_data->args.roctxMarkA.message)
|
|
{
|
|
thread_dispatch_rename->emplace(tool_metadata->add_kernel_rename_val(
|
|
add_message(marker_data->args.roctxMarkA.message), record.correlation_id.internal));
|
|
}
|
|
else if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA &&
|
|
record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER &&
|
|
marker_data->args.roctxThreadRangeA.message)
|
|
{
|
|
thread_dispatch_rename->emplace(tool_metadata->add_kernel_rename_val(
|
|
add_message(marker_data->args.roctxThreadRangeA.message),
|
|
record.correlation_id.internal));
|
|
}
|
|
else if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA &&
|
|
record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
|
|
{
|
|
ROCP_FATAL_IF(thread_dispatch_rename->empty())
|
|
<< "roctxRangePop invoked more times than roctxRangePush on thread "
|
|
<< rocprofiler::common::get_tid();
|
|
|
|
thread_dispatch_rename->pop();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ROCP_CI_LOG(INFO) << fmt::format(
|
|
"Unsupported operation for {}",
|
|
tool_metadata->get_operation_name(record.kind, record.operation));
|
|
}
|
|
|
|
common::consume_args(user_data, data);
|
|
}
|
|
|
|
// Stores stream IDs onto stack when callback is triggered
|
|
void
|
|
hip_stream_display_callback(rocprofiler_callback_tracing_record_t record,
|
|
rocprofiler_user_data_t* user_data,
|
|
void* data)
|
|
{
|
|
if(tool::get_config().group_by_queue || record.kind != ROCPROFILER_CALLBACK_TRACING_HIP_STREAM)
|
|
return;
|
|
// Extract stream ID from record
|
|
auto* stream_handle_data =
|
|
static_cast<rocprofiler_callback_tracing_hip_stream_data_t*>(record.payload);
|
|
auto stream_id = stream_handle_data->stream_id;
|
|
|
|
// STREAM_HANDLE_CREATE and DESTROY are no-ops
|
|
if(record.operation == ROCPROFILER_HIP_STREAM_CREATE)
|
|
{
|
|
ROCP_TRACE
|
|
<< "Entered hip_stream_display_callback function for ROCPROFILER_HIP_STREAM_CREATE";
|
|
}
|
|
else if(record.operation == ROCPROFILER_HIP_STREAM_DESTROY)
|
|
{
|
|
ROCP_TRACE
|
|
<< "Entered hip_stream_display_callback function for ROCPROFILER_HIP_STREAM_DESTROY";
|
|
}
|
|
else if(record.operation == ROCPROFILER_HIP_STREAM_SET)
|
|
{
|
|
// Push the stream ID onto the stream stack when before underlying HIP function is called
|
|
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
|
|
{
|
|
ROCP_TRACE << "Entered hip_stream_display_callback function for "
|
|
"ROCPROFILER_HIP_STREAM_SET with ROCPROFILER_CALLBACK_PHASE_ENTER";
|
|
rocprofiler::tool::stream::push_stream_id(stream_id);
|
|
}
|
|
// Pop stream ID off of stream stack after underlying HIP function is completed
|
|
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
|
|
{
|
|
ROCP_TRACE << "Entered hip_stream_display_callback function for "
|
|
"ROCPROFILER_HIP_STREAM_SET with ROCPROFILER_CALLBACK_PHASE_EXIT";
|
|
rocprofiler::tool::stream::pop_stream_id();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ROCP_FATAL << "Unsupported operation for ROCPROFILER_HIP_STREAM";
|
|
}
|
|
common::consume_args(user_data, data);
|
|
}
|
|
|
|
// Stores which runtimes have been initialized in metadata
|
|
void
|
|
runtime_initialization_callback(rocprofiler_callback_tracing_record_t record,
|
|
rocprofiler_user_data_t* user_data,
|
|
void* data)
|
|
{
|
|
if(record.kind != ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION) return;
|
|
ROCP_CI_LOG_IF(WARNING, tool_metadata == nullptr)
|
|
<< fmt::format("tool cannot record runtime initialization for {}",
|
|
tool_metadata->get_operation_name(record.kind, record.operation));
|
|
if(tool_metadata)
|
|
{
|
|
tool_metadata->add_runtime_initialization(
|
|
static_cast<rocprofiler_runtime_initialization_operation_t>(record.operation));
|
|
}
|
|
common::consume_args(user_data, data);
|
|
}
|
|
|
|
void
|
|
dummy_callback_tracing_callback(rocprofiler_callback_tracing_record_t /*record*/,
|
|
rocprofiler_user_data_t* /*user_data*/,
|
|
void* /*data*/)
|
|
{}
|
|
|
|
void
|
|
dummy_counter_dispatch_callback(rocprofiler_dispatch_counting_service_data_t,
|
|
rocprofiler_profile_config_id_t*,
|
|
rocprofiler_user_data_t*,
|
|
void*)
|
|
{}
|
|
|
|
void
|
|
dummy_counter_record_callback(rocprofiler_dispatch_counting_service_data_t,
|
|
rocprofiler_record_counter_t*,
|
|
size_t,
|
|
rocprofiler_user_data_t,
|
|
void*)
|
|
{}
|
|
|
|
void
|
|
callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
|
rocprofiler_user_data_t* user_data,
|
|
void* data)
|
|
{
|
|
if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API)
|
|
{
|
|
auto* marker_data =
|
|
static_cast<rocprofiler_callback_tracing_marker_api_data_t*>(record.payload);
|
|
|
|
auto ts = rocprofiler_timestamp_t{};
|
|
rocprofiler_get_timestamp(&ts);
|
|
|
|
if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA)
|
|
{
|
|
if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
|
|
{
|
|
CHECK_NOTNULL(tool_metadata)
|
|
->add_marker_message(record.correlation_id.internal,
|
|
std::string{marker_data->args.roctxMarkA.message});
|
|
|
|
auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{};
|
|
marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t);
|
|
marker_record.kind = convert_marker_tracing_kind(record.kind);
|
|
marker_record.operation = record.operation;
|
|
marker_record.thread_id = record.thread_id;
|
|
marker_record.correlation_id = record.correlation_id;
|
|
marker_record.start_timestamp = ts;
|
|
marker_record.end_timestamp = ts;
|
|
tool::write_ring_buffer(marker_record, domain_type::MARKER);
|
|
}
|
|
}
|
|
else if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA)
|
|
{
|
|
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
|
|
{
|
|
user_data->value = ts;
|
|
|
|
if(marker_data->args.roctxThreadRangeA.message)
|
|
{
|
|
CHECK_NOTNULL(tool_metadata)
|
|
->add_marker_message(
|
|
record.correlation_id.internal,
|
|
std::string{marker_data->args.roctxThreadRangeA.message});
|
|
}
|
|
}
|
|
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
|
|
{
|
|
auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{};
|
|
marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t);
|
|
marker_record.kind = convert_marker_tracing_kind(record.kind);
|
|
marker_record.operation = record.operation;
|
|
marker_record.thread_id = record.thread_id;
|
|
marker_record.correlation_id = record.correlation_id;
|
|
marker_record.start_timestamp = user_data->value;
|
|
marker_record.end_timestamp = ts;
|
|
|
|
tool::write_ring_buffer(marker_record, domain_type::MARKER);
|
|
}
|
|
}
|
|
else if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA)
|
|
{
|
|
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
|
|
{
|
|
user_data->value = ts;
|
|
|
|
if(marker_data->args.roctxProcessRangeA.message)
|
|
{
|
|
CHECK_NOTNULL(tool_metadata)
|
|
->add_marker_message(
|
|
record.correlation_id.internal,
|
|
std::string{marker_data->args.roctxProcessRangeA.message});
|
|
}
|
|
}
|
|
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
|
|
{
|
|
auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{};
|
|
marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t);
|
|
marker_record.kind = convert_marker_tracing_kind(record.kind);
|
|
marker_record.operation = record.operation;
|
|
marker_record.thread_id = record.thread_id;
|
|
marker_record.correlation_id = record.correlation_id;
|
|
marker_record.start_timestamp = user_data->value;
|
|
marker_record.end_timestamp = ts;
|
|
|
|
tool::write_ring_buffer(marker_record, domain_type::MARKER);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
|
|
{
|
|
user_data->value = ts;
|
|
}
|
|
else
|
|
{
|
|
auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{};
|
|
marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t);
|
|
marker_record.kind = convert_marker_tracing_kind(record.kind);
|
|
marker_record.operation = record.operation;
|
|
marker_record.thread_id = record.thread_id;
|
|
marker_record.correlation_id = record.correlation_id;
|
|
marker_record.start_timestamp = user_data->value;
|
|
marker_record.end_timestamp = ts;
|
|
tool::write_ring_buffer(marker_record, domain_type::MARKER);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ROCP_CI_LOG(INFO) << fmt::format(
|
|
"Unsupported operation for {}",
|
|
tool_metadata->get_operation_name(record.kind, record.operation));
|
|
}
|
|
|
|
(void) data;
|
|
}
|
|
|
|
void
|
|
code_object_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
|
rocprofiler_user_data_t* user_data,
|
|
void* data)
|
|
{
|
|
auto ts = rocprofiler_timestamp_t{};
|
|
ROCPROFILER_CALL(rocprofiler_get_timestamp(&ts), "get timestamp");
|
|
if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT &&
|
|
record.operation == ROCPROFILER_CODE_OBJECT_LOAD)
|
|
{
|
|
if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD)
|
|
{
|
|
auto* obj_data = static_cast<tool::rocprofiler_code_object_info_t*>(record.payload);
|
|
|
|
CHECK_NOTNULL(tool_metadata)->add_code_object(*obj_data);
|
|
if(tool::get_config().pc_sampling_host_trap ||
|
|
tool::get_config().pc_sampling_stochastic)
|
|
{
|
|
CHECK_NOTNULL(tool_metadata)->add_decoder(obj_data);
|
|
}
|
|
|
|
if(obj_data->storage_type == ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY &&
|
|
tool::get_config().advanced_thread_trace)
|
|
{
|
|
const char* gpu_name = tool_metadata->agents_map.at(obj_data->rocp_agent).name;
|
|
auto filename = fmt::format("{}_code_object_id_{}",
|
|
std::string(gpu_name),
|
|
std::to_string(obj_data->code_object_id));
|
|
auto output_stream = get_output_stream(tool::get_config(), filename, ".out");
|
|
std::string output_filename =
|
|
get_output_filename(tool::get_config(), filename, ".out");
|
|
|
|
// NOLINTNEXTLINE(performance-no-int-to-ptr)
|
|
output_stream.stream->write(reinterpret_cast<char*>(obj_data->memory_base),
|
|
obj_data->memory_size);
|
|
tool_metadata->code_object_load.wlock(
|
|
[](auto& data_vec,
|
|
std::string file_name,
|
|
tool::rocprofiler_code_object_info_t* obj_data_v) {
|
|
data_vec.push_back({file_name,
|
|
obj_data_v->code_object_id,
|
|
obj_data_v->load_base,
|
|
obj_data_v->load_size});
|
|
},
|
|
output_filename,
|
|
obj_data);
|
|
}
|
|
else if(obj_data->storage_type == ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE &&
|
|
tool::get_config().advanced_thread_trace)
|
|
{
|
|
const char* gpu_name = tool_metadata->agents_map.at(obj_data->rocp_agent).name;
|
|
auto filename = fmt::format("{}_code_object_id_{}",
|
|
std::string(gpu_name),
|
|
std::to_string(obj_data->code_object_id));
|
|
auto output_stream = get_output_stream(tool::get_config(), filename, ".out");
|
|
std::string output_filename =
|
|
get_output_filename(tool::get_config(), filename, ".out");
|
|
|
|
uint8_t* binary = nullptr;
|
|
size_t buffer_size = 0;
|
|
std::ifstream code_object_file(obj_data->uri, std::ios::binary | std::ios::ate);
|
|
if(code_object_file.good())
|
|
{
|
|
buffer_size = code_object_file.tellg();
|
|
code_object_file.seekg(0, std::ios::beg);
|
|
binary = new(std::nothrow) uint8_t[buffer_size];
|
|
if(binary &&
|
|
!code_object_file.read(reinterpret_cast<char*>(binary), buffer_size))
|
|
{
|
|
delete[] binary;
|
|
binary = nullptr;
|
|
}
|
|
}
|
|
// NOLINTBEGIN(performance-no-int-to-ptr)
|
|
output_stream.stream->write(reinterpret_cast<char*>(obj_data->memory_base),
|
|
obj_data->memory_size);
|
|
// NOLINTEND(performance-no-int-to-ptr)
|
|
tool_metadata->code_object_load.wlock(
|
|
[](auto& data_vec,
|
|
std::string file_name,
|
|
tool::rocprofiler_code_object_info_t* obj_data_v) {
|
|
data_vec.push_back({file_name,
|
|
obj_data_v->code_object_id,
|
|
obj_data_v->load_base,
|
|
obj_data_v->load_size});
|
|
},
|
|
output_filename,
|
|
obj_data);
|
|
}
|
|
}
|
|
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_UNLOAD)
|
|
{
|
|
flush();
|
|
}
|
|
}
|
|
|
|
if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT &&
|
|
record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER)
|
|
{
|
|
auto* sym_data = static_cast<tool::rocprofiler_kernel_symbol_info_t*>(record.payload);
|
|
if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD)
|
|
{
|
|
ROCP_TRACE << fmt::format("adding kernel symbol info for kernel_id={} :: {}",
|
|
sym_data->kernel_id,
|
|
sym_data->kernel_name);
|
|
|
|
auto success = CHECK_NOTNULL(tool_metadata)
|
|
->add_kernel_symbol(kernel_symbol_info{
|
|
get_dereference(sym_data),
|
|
[](const char* val) { return tool::format_name(val); }});
|
|
|
|
ROCP_WARNING_IF(!success)
|
|
<< "duplicate kernel symbol data for kernel_id=" << sym_data->kernel_id;
|
|
|
|
// add the kernel to the kernel_targets if
|
|
if(success)
|
|
{
|
|
// if kernel name is provided by user then by default all kernels in the
|
|
// application are targeted
|
|
const auto* kernel_info =
|
|
CHECK_NOTNULL(tool_metadata)->get_kernel_symbol(sym_data->kernel_id);
|
|
auto kernel_filter_include = tool::get_config().kernel_filter_include;
|
|
auto kernel_filter_exclude = tool::get_config().kernel_filter_exclude;
|
|
auto kernel_filter_range = tool::get_config().kernel_filter_range;
|
|
|
|
std::regex include_regex(kernel_filter_include);
|
|
std::regex exclude_regex(kernel_filter_exclude);
|
|
if(std::regex_search(kernel_info->formatted_kernel_name, include_regex))
|
|
{
|
|
if(kernel_filter_exclude.empty() ||
|
|
!std::regex_search(kernel_info->formatted_kernel_name, exclude_regex))
|
|
add_kernel_target(sym_data->kernel_id, kernel_filter_range);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if(record.kind == ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT &&
|
|
record.operation == ROCPROFILER_CODE_OBJECT_HOST_KERNEL_SYMBOL_REGISTER)
|
|
{
|
|
auto* hst_data = static_cast<rocprofiler_host_kernel_symbol_data_t*>(record.payload);
|
|
if(record.phase == ROCPROFILER_CALLBACK_PHASE_LOAD)
|
|
{
|
|
auto success = CHECK_NOTNULL(tool_metadata)
|
|
->add_host_function(host_function_info{
|
|
get_dereference(hst_data),
|
|
[](const char* val) { return tool::format_name(val); }});
|
|
ROCP_WARNING_IF(!success)
|
|
<< "duplicate host function found for kernel_id=" << hst_data->kernel_id;
|
|
|
|
// TODO : kernel filtering for host functions?!
|
|
}
|
|
}
|
|
|
|
(void) user_data;
|
|
(void) data;
|
|
}
|
|
|
|
void
|
|
dummy_buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
|
|
rocprofiler_buffer_id_t /*buffer_id*/,
|
|
rocprofiler_record_header_t** /*headers*/,
|
|
size_t /*num_headers*/,
|
|
void* /*user_data*/,
|
|
uint64_t /*drop_count*/)
|
|
{}
|
|
|
|
void
|
|
buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
|
|
rocprofiler_buffer_id_t /*buffer_id*/,
|
|
rocprofiler_record_header_t** headers,
|
|
size_t num_headers,
|
|
void* /* user_data*/,
|
|
uint64_t /*drop_count*/)
|
|
{
|
|
ROCP_INFO << "Executing buffered tracing callback for " << num_headers << " headers";
|
|
|
|
if(!headers) return;
|
|
|
|
for(size_t i = 0; i < num_headers; ++i)
|
|
{
|
|
auto* header = headers[i];
|
|
|
|
if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING)
|
|
{
|
|
if(header->kind == ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH)
|
|
{
|
|
auto* record = static_cast<rocprofiler_buffer_tracing_kernel_dispatch_record_t*>(
|
|
header->payload);
|
|
|
|
auto stream_id = get_stream_id(record);
|
|
tool::write_ring_buffer(
|
|
tool::tool_buffer_tracing_kernel_dispatch_ext_record_t{*record, stream_id},
|
|
domain_type::KERNEL_DISPATCH);
|
|
}
|
|
else if(header->kind == ROCPROFILER_BUFFER_TRACING_HSA_CORE_API ||
|
|
header->kind == ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API ||
|
|
header->kind == ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API ||
|
|
header->kind == ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API)
|
|
{
|
|
auto* record =
|
|
static_cast<rocprofiler_buffer_tracing_hsa_api_record_t*>(header->payload);
|
|
|
|
tool::write_ring_buffer(*record, domain_type::HSA);
|
|
}
|
|
else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_COPY)
|
|
{
|
|
auto* record =
|
|
static_cast<rocprofiler_buffer_tracing_memory_copy_record_t*>(header->payload);
|
|
|
|
auto stream_id = get_stream_id(record);
|
|
tool::write_ring_buffer(
|
|
tool::tool_buffer_tracing_memory_copy_ext_record_t{*record, stream_id},
|
|
domain_type::MEMORY_COPY);
|
|
}
|
|
else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION)
|
|
{
|
|
auto* record = static_cast<rocprofiler_buffer_tracing_memory_allocation_record_t*>(
|
|
header->payload);
|
|
|
|
auto stream_id = get_stream_id(record);
|
|
tool::write_ring_buffer(
|
|
tool::tool_buffer_tracing_memory_allocation_ext_record_t{*record, stream_id},
|
|
domain_type::MEMORY_ALLOCATION);
|
|
}
|
|
else if(header->kind == ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY)
|
|
{
|
|
auto* record = static_cast<rocprofiler_buffer_tracing_scratch_memory_record_t*>(
|
|
header->payload);
|
|
|
|
tool::write_ring_buffer(*record, domain_type::SCRATCH_MEMORY);
|
|
}
|
|
else if(header->kind == ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT ||
|
|
header->kind == ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT)
|
|
{
|
|
auto* record =
|
|
static_cast<rocprofiler_buffer_tracing_hip_api_ext_record_t*>(header->payload);
|
|
|
|
auto stream_id = get_stream_id(record);
|
|
tool::write_ring_buffer(
|
|
tool::tool_buffer_tracing_hip_api_ext_record_t{*record, stream_id},
|
|
domain_type::HIP);
|
|
}
|
|
else if(header->kind == ROCPROFILER_BUFFER_TRACING_RCCL_API)
|
|
{
|
|
auto* record =
|
|
static_cast<rocprofiler_buffer_tracing_rccl_api_record_t*>(header->payload);
|
|
|
|
tool::write_ring_buffer(*record, domain_type::RCCL);
|
|
}
|
|
else if(header->kind == ROCPROFILER_BUFFER_TRACING_ROCDECODE_API_EXT)
|
|
{
|
|
auto* record = static_cast<rocprofiler_buffer_tracing_rocdecode_api_ext_record_t*>(
|
|
header->payload);
|
|
|
|
tool::write_ring_buffer(*record, domain_type::ROCDECODE);
|
|
}
|
|
else if(header->kind == ROCPROFILER_BUFFER_TRACING_ROCJPEG_API)
|
|
{
|
|
auto* record =
|
|
static_cast<rocprofiler_buffer_tracing_rocjpeg_api_record_t*>(header->payload);
|
|
|
|
tool::write_ring_buffer(*record, domain_type::ROCJPEG);
|
|
}
|
|
else
|
|
{
|
|
ROCP_CI_LOG(WARNING) << fmt::format(
|
|
"unsupported ROCPROFILER_BUFFER_CATEGORY_TRACING kind: {} :: {}",
|
|
header->kind,
|
|
tool_metadata->get_kind_name(
|
|
static_cast<rocprofiler_buffer_tracing_kind_t>(header->kind)));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ROCP_CI_LOG(WARNING) << fmt::format(
|
|
"unsupported category + kind: {} + {}", header->category, header->kind);
|
|
}
|
|
}
|
|
}
|
|
|
|
using counter_vec_t = std::vector<rocprofiler_counter_id_t>;
|
|
using agent_counter_map_t =
|
|
std::unordered_map<rocprofiler_agent_id_t, std::optional<rocprofiler_counter_config_id_t>>;
|
|
|
|
auto
|
|
get_gpu_agents()
|
|
{
|
|
return CHECK_NOTNULL(tool_metadata)->get_gpu_agents();
|
|
}
|
|
|
|
auto
|
|
get_agent_counter_info()
|
|
{
|
|
return CHECK_NOTNULL(tool_metadata)->agent_counter_info;
|
|
}
|
|
|
|
struct agent_profiles
|
|
{
|
|
std::unordered_map<rocprofiler_agent_id_t, std::atomic<uint64_t>> current_iter;
|
|
const uint64_t rotation;
|
|
const std::unordered_map<rocprofiler_agent_id_t, std::vector<rocprofiler_counter_config_id_t>>
|
|
profiles;
|
|
};
|
|
|
|
std::optional<rocprofiler_counter_config_id_t>
|
|
construct_counter_collection_profile(rocprofiler_agent_id_t agent_id,
|
|
const std::set<std::string>& counters)
|
|
{
|
|
static const auto gpu_agents_counter_info = get_agent_counter_info();
|
|
auto profile = std::optional<rocprofiler_counter_config_id_t>{};
|
|
auto counters_v = counter_vec_t{};
|
|
auto found_v = std::vector<std::string_view>{};
|
|
const auto* agent_v = tool_metadata->get_agent(agent_id);
|
|
auto expected_v = counters.size();
|
|
|
|
constexpr auto device_qualifier = std::string_view{":device="};
|
|
for(const auto& itr : counters)
|
|
{
|
|
auto name_v = itr;
|
|
if(auto pos = std::string::npos; (pos = itr.find(device_qualifier)) != std::string::npos)
|
|
{
|
|
name_v = itr.substr(0, pos);
|
|
auto dev_id_s = itr.substr(pos + device_qualifier.length());
|
|
|
|
ROCP_FATAL_IF(dev_id_s.empty() ||
|
|
dev_id_s.find_first_not_of("0123456789") != std::string::npos)
|
|
<< "invalid device qualifier format (':device=N) where N is the "
|
|
"GPU "
|
|
"id: "
|
|
<< itr;
|
|
|
|
auto dev_id_v = std::stol(dev_id_s);
|
|
// skip this counter if the counter is for a specific device id (which
|
|
// doesn't this agent's device id)
|
|
if(dev_id_v != agent_v->gpu_index)
|
|
{
|
|
--expected_v; // is not expected
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// search the gpu agent counter info for a counter with a matching name
|
|
for(const auto& citr : gpu_agents_counter_info.at(agent_id))
|
|
{
|
|
if(name_v == std::string_view{citr.name})
|
|
{
|
|
counters_v.emplace_back(citr.id);
|
|
found_v.emplace_back(itr);
|
|
}
|
|
}
|
|
}
|
|
|
|
if(expected_v != counters_v.size())
|
|
{
|
|
auto requested_counters =
|
|
fmt::format("{}", fmt::join(counters.begin(), counters.end(), ", "));
|
|
auto found_counters = fmt::format("{}", fmt::join(found_v.begin(), found_v.end(), ", "));
|
|
ROCP_WARNING << "Unable to find all counters for agent " << agent_v->node_id << " (gpu-"
|
|
<< agent_v->gpu_index << ", " << agent_v->name << ") in ["
|
|
<< requested_counters << "]. Found: [" << found_counters << "]";
|
|
}
|
|
|
|
if(!counters_v.empty())
|
|
{
|
|
auto profile_v = rocprofiler_counter_config_id_t{};
|
|
ROCPROFILER_CALL(rocprofiler_create_counter_config(
|
|
agent_id, counters_v.data(), counters_v.size(), &profile_v),
|
|
"Could not construct profile cfg");
|
|
profile = profile_v;
|
|
}
|
|
return profile;
|
|
}
|
|
|
|
agent_profiles
|
|
generate_agent_profiles()
|
|
{
|
|
std::unordered_map<rocprofiler_agent_id_t, std::vector<rocprofiler_counter_config_id_t>>
|
|
profiles;
|
|
std::unordered_map<rocprofiler_agent_id_t, std::atomic<uint64_t>> pos;
|
|
for(const auto& agent : get_gpu_agents())
|
|
{
|
|
for(const auto& counter_set : tool::get_config().counters)
|
|
{
|
|
if(agent->type != ROCPROFILER_AGENT_TYPE_GPU) continue;
|
|
auto profile = construct_counter_collection_profile(agent->id, counter_set);
|
|
if(profile.has_value())
|
|
{
|
|
profiles[agent->id].push_back(profile.value());
|
|
}
|
|
}
|
|
pos[agent->id] = 0;
|
|
}
|
|
return agent_profiles{std::move(pos), tool::get_config().counter_groups_interval, profiles};
|
|
}
|
|
|
|
// this function creates a rocprofiler profile config on the first entry
|
|
std::optional<rocprofiler_counter_config_id_t>
|
|
get_device_counting_service(rocprofiler_agent_id_t agent_id)
|
|
{
|
|
static auto agent_profiles = generate_agent_profiles();
|
|
|
|
auto agent_iter = agent_profiles.current_iter.find(agent_id);
|
|
if(agent_iter == agent_profiles.current_iter.end())
|
|
{
|
|
return std::nullopt;
|
|
}
|
|
|
|
auto my_iter = agent_iter->second.fetch_add(1);
|
|
|
|
const auto profiles = agent_profiles.profiles.find(agent_id);
|
|
if(profiles == agent_profiles.profiles.end())
|
|
{
|
|
return std::nullopt;
|
|
}
|
|
|
|
if(profiles->second.empty()) return std::nullopt;
|
|
|
|
uint64_t profile_pos = my_iter / agent_profiles.rotation;
|
|
return profiles->second[profile_pos % profiles->second.size()];
|
|
}
|
|
|
|
int64_t
|
|
get_instruction_index(rocprofiler_pc_t pc)
|
|
{
|
|
if(pc.code_object_id == ROCPROFILER_CODE_OBJECT_ID_NONE)
|
|
return -1;
|
|
else
|
|
return CHECK_NOTNULL(tool_metadata)->get_instruction_index(pc);
|
|
}
|
|
|
|
} // namespace
|
|
|
|
std::vector<rocprofiler_thread_trace_parameter_t>
|
|
get_att_perfcounter_params(rocprofiler_agent_id_t agent,
|
|
std::vector<rocprofiler::tool::att_perfcounter>& att_perf_counters)
|
|
{
|
|
std::vector<rocprofiler_thread_trace_parameter_t> _data{};
|
|
if(att_perf_counters.empty()) return _data;
|
|
|
|
static const auto agent_counter_info = get_agent_counter_info();
|
|
|
|
for(const auto& att_perf_counter : att_perf_counters)
|
|
{
|
|
bool counter_found = false;
|
|
|
|
for(const auto& counter_info_ : agent_counter_info.at(agent))
|
|
{
|
|
if(std::string_view(counter_info_.name) != att_perf_counter.counter_name) continue;
|
|
|
|
auto param = rocprofiler_thread_trace_parameter_t{};
|
|
param.type = ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER;
|
|
param.counter_id = counter_info_.id;
|
|
param.simd_mask = att_perf_counter.simd_mask;
|
|
_data.emplace_back(param);
|
|
counter_found = true;
|
|
break;
|
|
}
|
|
|
|
ROCP_WARNING_IF(!counter_found)
|
|
<< "Agent " << agent.handle << " counter not found: " << att_perf_counter.counter_name;
|
|
}
|
|
|
|
return _data;
|
|
}
|
|
|
|
void
|
|
pc_sampling_callback(rocprofiler_context_id_t /* context_id*/,
|
|
rocprofiler_buffer_id_t /* buffer_id*/,
|
|
rocprofiler_record_header_t** headers,
|
|
size_t num_headers,
|
|
void* /*data*/,
|
|
uint64_t /* drop_count*/)
|
|
{
|
|
if(!headers) return;
|
|
|
|
// count number of valid VS invalid samples delivered by this callback
|
|
uint64_t valid_samples_cnt = 0;
|
|
uint64_t invalid_samples_cnt = 0;
|
|
|
|
for(size_t i = 0; i < num_headers; i++)
|
|
{
|
|
auto* cur_header = headers[i];
|
|
|
|
if(cur_header == nullptr)
|
|
{
|
|
ROCP_CI_LOG(WARNING) << "rocprofiler provided a null pointer to buffer record header. "
|
|
"this should never happen";
|
|
continue;
|
|
}
|
|
|
|
else if(cur_header->category == ROCPROFILER_BUFFER_CATEGORY_PC_SAMPLING)
|
|
{
|
|
if(cur_header->kind == ROCPROFILER_PC_SAMPLING_RECORD_HOST_TRAP_V0_SAMPLE)
|
|
{
|
|
auto* pc_sample = static_cast<rocprofiler_pc_sampling_record_host_trap_v0_t*>(
|
|
cur_header->payload);
|
|
|
|
auto pc_sample_tool_record =
|
|
rocprofiler::tool::rocprofiler_tool_pc_sampling_host_trap_record_t(
|
|
*pc_sample, get_instruction_index(pc_sample->pc));
|
|
|
|
rocprofiler::tool::write_ring_buffer(pc_sample_tool_record,
|
|
domain_type::PC_SAMPLING_HOST_TRAP);
|
|
|
|
valid_samples_cnt++;
|
|
}
|
|
else if(cur_header->kind == ROCPROFILER_PC_SAMPLING_RECORD_STOCHASTIC_V0_SAMPLE)
|
|
{
|
|
auto* pc_sample = static_cast<rocprofiler_pc_sampling_record_stochastic_v0_t*>(
|
|
cur_header->payload);
|
|
|
|
auto pc_sample_tool_record =
|
|
rocprofiler::tool::rocprofiler_tool_pc_sampling_stochastic_record_t(
|
|
*pc_sample, get_instruction_index(pc_sample->pc));
|
|
|
|
rocprofiler::tool::write_ring_buffer(pc_sample_tool_record,
|
|
domain_type::PC_SAMPLING_STOCHASTIC);
|
|
valid_samples_cnt++;
|
|
}
|
|
else if(cur_header->kind == ROCPROFILER_PC_SAMPLING_RECORD_INVALID_SAMPLE)
|
|
{
|
|
invalid_samples_cnt++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ROCP_FATAL << "unexpected rocprofiler_record_header_t category + kind";
|
|
}
|
|
}
|
|
|
|
// sum up number of valid/invalid samples for pc sampling stats
|
|
tool_metadata->pc_sampling_stats.wlock(
|
|
[valid_samples_cnt, invalid_samples_cnt](auto& pc_sampling_stats) {
|
|
pc_sampling_stats.valid_samples += valid_samples_cnt;
|
|
pc_sampling_stats.invalid_samples += invalid_samples_cnt;
|
|
});
|
|
}
|
|
|
|
void
|
|
att_shader_data_callback(rocprofiler_agent_id_t agent,
|
|
int64_t se_id,
|
|
void* se_data,
|
|
size_t data_size,
|
|
rocprofiler_user_data_t userdata)
|
|
{
|
|
std::lock_guard<std::mutex> lock(att_shader_data);
|
|
std::stringstream filename;
|
|
filename << fmt::format("{}_shader_engine_{}_{}", agent.handle, se_id, userdata.value);
|
|
|
|
auto dispatch_id = static_cast<rocprofiler_dispatch_id_t>(userdata.value);
|
|
auto output_stream = get_output_stream(tool::get_config(), filename.str(), ".att");
|
|
std::string output_filename = get_output_filename(tool::get_config(), filename.str(), ".att");
|
|
|
|
output_stream.stream->write(reinterpret_cast<char*>(se_data), data_size);
|
|
tool_metadata->att_filenames[dispatch_id].first = agent;
|
|
tool_metadata->att_filenames[dispatch_id].second.emplace_back(output_filename);
|
|
}
|
|
|
|
rocprofiler_thread_trace_control_flags_t
|
|
att_dispatch_callback(rocprofiler_agent_id_t /* agent_id */,
|
|
rocprofiler_queue_id_t /* queue_id */,
|
|
rocprofiler_async_correlation_id_t /* correlation_id */,
|
|
rocprofiler_kernel_id_t kernel_id,
|
|
rocprofiler_dispatch_id_t dispatch_id,
|
|
void* /*userdata_config*/,
|
|
rocprofiler_user_data_t* userdata_shader)
|
|
{
|
|
static auto kernel_iteration = common::Synchronized<kernel_iteration_t, true>{};
|
|
|
|
userdata_shader->value = dispatch_id;
|
|
|
|
if(is_targeted_kernel(kernel_id, kernel_iteration))
|
|
return ROCPROFILER_THREAD_TRACE_CONTROL_START_AND_STOP;
|
|
return ROCPROFILER_THREAD_TRACE_CONTROL_NONE;
|
|
}
|
|
|
|
void
|
|
counter_dispatch_callback(rocprofiler_dispatch_counting_service_data_t dispatch_data,
|
|
rocprofiler_counter_config_id_t* config,
|
|
rocprofiler_user_data_t* user_data,
|
|
void* /*callback_data_args*/)
|
|
{
|
|
static auto kernel_iteration = common::Synchronized<kernel_iteration_t, true>{};
|
|
|
|
auto kernel_id = dispatch_data.dispatch_info.kernel_id;
|
|
auto agent_id = dispatch_data.dispatch_info.agent_id;
|
|
|
|
if(!is_targeted_kernel(kernel_id, kernel_iteration))
|
|
{
|
|
return;
|
|
}
|
|
else if(auto profile = get_device_counting_service(agent_id))
|
|
{
|
|
*config = *profile;
|
|
user_data->value = common::get_tid();
|
|
}
|
|
}
|
|
|
|
void
|
|
counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_data,
|
|
rocprofiler_counter_record_t* record_data,
|
|
size_t record_count,
|
|
rocprofiler_user_data_t user_data,
|
|
void* /*callback_data_args*/)
|
|
{
|
|
static const auto gpu_agents = get_gpu_agents();
|
|
static const auto gpu_agents_counter_info = get_agent_counter_info();
|
|
|
|
auto counter_record = tool::tool_counter_record_t{};
|
|
|
|
// must call get_stream_id on dispatch_data before copying to counter_record.dispatch_data
|
|
// so that external correlation id is updated before copy is made
|
|
counter_record.stream_id = get_stream_id(&dispatch_data);
|
|
counter_record.dispatch_data = dispatch_data;
|
|
counter_record.thread_id = user_data.value;
|
|
auto serialized_records = std::vector<tool::tool_counter_value_t>{};
|
|
serialized_records.reserve(record_count);
|
|
|
|
for(size_t count = 0; count < record_count; ++count)
|
|
{
|
|
auto _counter_id = rocprofiler_counter_id_t{};
|
|
ROCPROFILER_CALL(rocprofiler_query_record_counter_id(record_data[count].id, &_counter_id),
|
|
"query record counter id");
|
|
serialized_records.emplace_back(
|
|
tool::tool_counter_value_t{_counter_id, record_data[count].counter_value});
|
|
}
|
|
|
|
if(!serialized_records.empty())
|
|
{
|
|
counter_record.write(serialized_records);
|
|
tool::write_ring_buffer(counter_record, domain_type::COUNTER_COLLECTION);
|
|
}
|
|
}
|
|
|
|
rocprofiler_client_finalize_t client_finalizer = nullptr;
|
|
rocprofiler_client_id_t* client_identifier = nullptr;
|
|
|
|
void
|
|
initialize_logging()
|
|
{
|
|
static auto _once = std::atomic<uint64_t>{0};
|
|
if(_once++ == 0)
|
|
{
|
|
auto logging_cfg = rocprofiler::common::logging_config{.install_failure_handler = true};
|
|
common::init_logging("ROCPROF", logging_cfg);
|
|
FLAGS_colorlogtostderr = true;
|
|
}
|
|
}
|
|
|
|
void
|
|
initialize_rocprofv3()
|
|
{
|
|
ROCP_INFO << "initializing rocprofv3...";
|
|
|
|
if(int status = 0;
|
|
rocprofiler_is_initialized(&status) == ROCPROFILER_STATUS_SUCCESS && status == 0)
|
|
{
|
|
ROCPROFILER_CALL(rocprofiler_force_configure(&rocprofiler_configure),
|
|
"force configuration");
|
|
}
|
|
|
|
ROCP_FATAL_IF(!client_identifier) << "nullptr to client identifier!";
|
|
ROCP_FATAL_IF(!client_finalizer && !tool::get_config().list_metrics)
|
|
<< "nullptr to client finalizer!"; // exception for listing metrics
|
|
}
|
|
|
|
void
|
|
initialize_signal_handler(sigaction_func_t sigaction_func)
|
|
{
|
|
if(sigaction_func == nullptr) sigaction_func = &sigaction;
|
|
|
|
struct sigaction sig_act = {};
|
|
sigemptyset(&sig_act.sa_mask);
|
|
sig_act.sa_flags = (SA_SIGINFO | SA_RESETHAND | SA_NOCLDSTOP);
|
|
sig_act.sa_sigaction = &rocprofv3_error_signal_handler;
|
|
for(auto signal_v : rocprofv3_handled_signals)
|
|
{
|
|
if(get_chained_signals().at(signal_v))
|
|
{
|
|
ROCP_INFO << "Skipping install of signal handler for signal " << signal_v
|
|
<< " (already wrapped)";
|
|
continue;
|
|
}
|
|
|
|
ROCP_INFO << "Installing signal handler for signal " << signal_v;
|
|
if(sigaction_func(signal_v, &sig_act, nullptr) != 0)
|
|
{
|
|
auto _errno_v = errno;
|
|
ROCP_ERROR << "error setting signal handler for " << signal_v
|
|
<< " :: " << strerror(_errno_v);
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
finalize_rocprofv3(std::string_view context)
|
|
{
|
|
ROCP_INFO << "invoked: finalize_rocprofv3";
|
|
if(client_finalizer && client_identifier)
|
|
{
|
|
ROCP_INFO << "finalizing rocprofv3: caller='" << context << "'...";
|
|
client_finalizer(*client_identifier);
|
|
client_finalizer = nullptr;
|
|
client_identifier = nullptr;
|
|
}
|
|
else
|
|
{
|
|
ROCP_INFO << "finalize_rocprofv3('" << context << "') ignored: already finalized";
|
|
}
|
|
}
|
|
|
|
bool
|
|
if_pc_sample_config_match(rocprofiler_agent_id_t agent_id,
|
|
rocprofiler_pc_sampling_method_t pc_sampling_method,
|
|
rocprofiler_pc_sampling_unit_t pc_sampling_unit,
|
|
uint64_t pc_sampling_interval)
|
|
{
|
|
auto pc_sampling_config = CHECK_NOTNULL(tool_metadata)->get_pc_sample_config_info(agent_id);
|
|
if(!pc_sampling_config.empty())
|
|
{
|
|
for(auto config : pc_sampling_config)
|
|
{
|
|
if(config.method == pc_sampling_method && config.unit == pc_sampling_unit &&
|
|
config.min_interval <= pc_sampling_interval &&
|
|
config.max_interval >= pc_sampling_interval)
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void
|
|
configure_pc_sampling_on_all_agents(uint64_t buffer_size,
|
|
uint64_t buffer_watermark,
|
|
void* tool_data,
|
|
rocprofiler_buffer_tracing_cb_t pc_sampling_cb)
|
|
{
|
|
auto method = tool::get_config().pc_sampling_method_value;
|
|
auto unit = tool::get_config().pc_sampling_unit_value;
|
|
|
|
// Find the proper buffer_id based on the method
|
|
auto* buffer_id = (method == ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP)
|
|
? &get_buffers().pc_sampling_host_trap
|
|
: &get_buffers().pc_sampling_stochastic;
|
|
|
|
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
|
|
buffer_size,
|
|
buffer_watermark,
|
|
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
|
pc_sampling_cb,
|
|
tool_data,
|
|
buffer_id),
|
|
"buffer creation");
|
|
|
|
bool config_match_found = false;
|
|
auto agent_ptr_vec = get_gpu_agents();
|
|
for(auto& itr : agent_ptr_vec)
|
|
{
|
|
if(if_pc_sample_config_match(
|
|
itr->id, method, unit, tool::get_config().pc_sampling_interval))
|
|
{
|
|
config_match_found = true;
|
|
int flags = 0;
|
|
ROCPROFILER_CALL(
|
|
rocprofiler_configure_pc_sampling_service(get_client_ctx(),
|
|
itr->id,
|
|
method,
|
|
unit,
|
|
tool::get_config().pc_sampling_interval,
|
|
*buffer_id,
|
|
flags),
|
|
"configure PC sampling");
|
|
}
|
|
}
|
|
if(!config_match_found)
|
|
{
|
|
ROCP_ERROR << "Given PC sampling configuration is not supported on any of the agents";
|
|
std::exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
struct real_callbacks_t
|
|
{};
|
|
|
|
struct dummy_callbacks_t
|
|
{};
|
|
|
|
constexpr auto use_real_callbacks = real_callbacks_t{};
|
|
constexpr auto use_dummy_callbacks = dummy_callbacks_t{};
|
|
|
|
struct tracing_callbacks_t
|
|
{
|
|
tracing_callbacks_t() = delete;
|
|
|
|
tracing_callbacks_t(real_callbacks_t)
|
|
: code_object_tracing{code_object_tracing_callback}
|
|
, cntrl_tracing{cntrl_tracing_callback}
|
|
, kernel_rename{kernel_rename_callback}
|
|
, hip_stream{hip_stream_display_callback}
|
|
, callback_tracing{callback_tracing_callback}
|
|
, buffered_tracing{buffered_tracing_callback}
|
|
, pc_sampling{pc_sampling_callback}
|
|
, att_dispatch{att_dispatch_callback}
|
|
, att_shader_data{att_shader_data_callback}
|
|
, counter_dispatch{counter_dispatch_callback}
|
|
, counter_record{counter_record_callback}
|
|
{}
|
|
|
|
explicit tracing_callbacks_t(dummy_callbacks_t)
|
|
: code_object_tracing{dummy_callback_tracing_callback}
|
|
, cntrl_tracing{dummy_callback_tracing_callback}
|
|
, kernel_rename{dummy_callback_tracing_callback}
|
|
, hip_stream{dummy_callback_tracing_callback}
|
|
, callback_tracing{dummy_callback_tracing_callback}
|
|
, buffered_tracing{dummy_buffered_tracing_callback}
|
|
, pc_sampling{dummy_buffered_tracing_callback}
|
|
, counter_dispatch{dummy_counter_dispatch_callback}
|
|
, counter_record{dummy_counter_record_callback}
|
|
{}
|
|
|
|
const rocprofiler_callback_tracing_cb_t code_object_tracing = nullptr;
|
|
const rocprofiler_callback_tracing_cb_t cntrl_tracing = nullptr;
|
|
const rocprofiler_callback_tracing_cb_t kernel_rename = nullptr;
|
|
const rocprofiler_callback_tracing_cb_t hip_stream = nullptr;
|
|
const rocprofiler_callback_tracing_cb_t callback_tracing = nullptr;
|
|
const rocprofiler_buffer_tracing_cb_t buffered_tracing = nullptr;
|
|
const rocprofiler_buffer_tracing_cb_t pc_sampling = nullptr;
|
|
const rocprofiler_thread_trace_dispatch_callback_t att_dispatch = nullptr;
|
|
const rocprofiler_thread_trace_shader_data_callback_t att_shader_data = nullptr;
|
|
const rocprofiler_dispatch_counting_service_cb_t counter_dispatch = nullptr;
|
|
const rocprofiler_dispatch_counting_record_cb_t counter_record = nullptr;
|
|
};
|
|
|
|
auto
|
|
get_tracing_callbacks()
|
|
{
|
|
// for the benchmarking modes of sdk buffer/callback overhead, we are measuring the cost
|
|
// of the SDK invoking the callbacks to the tool. We do not want to include the overhead
|
|
// of the tool doing any work so we use "dummy" callbacks (i.e. functions which just
|
|
// immediately return)
|
|
if(tool::get_config().benchmark_mode == tool::config::benchmark::sdk_buffered_overhead ||
|
|
tool::get_config().benchmark_mode == tool::config::benchmark::sdk_callback_overhead ||
|
|
tool::get_config().benchmark_mode == tool::config::benchmark::execution_profile)
|
|
{
|
|
return tracing_callbacks_t{use_dummy_callbacks};
|
|
}
|
|
|
|
return tracing_callbacks_t{use_real_callbacks};
|
|
}
|
|
|
|
int
|
|
tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
|
{
|
|
static constexpr auto null_context_id = rocprofiler_context_id_t{.handle = 0};
|
|
static constexpr auto null_buffer_id = rocprofiler_buffer_id_t{.handle = 0};
|
|
|
|
auto _init_timer = common::simple_timer{"[rocprofv3] tool initialization"};
|
|
|
|
client_finalizer = fini_func;
|
|
|
|
const uint64_t buffer_size = 16 * common::units::get_page_size();
|
|
const uint64_t buffer_watermark = 15 * common::units::get_page_size();
|
|
|
|
tool_metadata->init(tool::metadata::inprocess{});
|
|
|
|
ROCPROFILER_CALL(rocprofiler_create_context(&get_client_ctx()), "create context failed");
|
|
|
|
auto code_obj_ctx = null_context_id;
|
|
ROCPROFILER_CALL(rocprofiler_create_context(&code_obj_ctx), "failed to create context");
|
|
|
|
auto start_context = [](rocprofiler_context_id_t ctx_id, std::string_view msg) {
|
|
using benchmark = tool::config::benchmark;
|
|
// do not start context if we are benchmarking the overhead of a service
|
|
// being available but unused by any contexts
|
|
if(tool::get_config().benchmark_mode != benchmark::disabled_contexts_overhead &&
|
|
ctx_id != null_context_id)
|
|
{
|
|
if(tool::get_config().benchmark_mode == benchmark::execution_profile)
|
|
{
|
|
ROCPROFILER_CHECK(rocprofiler_configure_external_correlation_id_request_service(
|
|
ctx_id, nullptr, 0, record_execution_profile, nullptr));
|
|
}
|
|
|
|
ROCP_INFO << fmt::format("starting {} context...", msg);
|
|
ROCPROFILER_CHECK(rocprofiler_start_context(ctx_id));
|
|
}
|
|
};
|
|
|
|
auto callbacks = get_tracing_callbacks();
|
|
|
|
ROCPROFILER_CALL(
|
|
rocprofiler_configure_callback_tracing_service(code_obj_ctx,
|
|
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT,
|
|
nullptr,
|
|
0,
|
|
callbacks.code_object_tracing,
|
|
nullptr),
|
|
"code object tracing configure failed");
|
|
|
|
start_context(code_obj_ctx, "code object");
|
|
|
|
if(tool::get_config().marker_api_trace)
|
|
{
|
|
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
|
|
get_client_ctx(),
|
|
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API,
|
|
nullptr,
|
|
0,
|
|
callbacks.callback_tracing,
|
|
nullptr),
|
|
"callback tracing service failed to configure");
|
|
|
|
auto pause_resume_ctx = null_context_id;
|
|
ROCPROFILER_CALL(rocprofiler_create_context(&pause_resume_ctx), "failed to create context");
|
|
|
|
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
|
|
pause_resume_ctx,
|
|
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
|
|
nullptr,
|
|
0,
|
|
callbacks.cntrl_tracing,
|
|
static_cast<void*>(&get_client_ctx())),
|
|
"callback tracing service failed to configure");
|
|
|
|
start_context(pause_resume_ctx, "marker pause/resume");
|
|
}
|
|
|
|
struct buffer_service_config
|
|
{
|
|
bool option = false;
|
|
rocprofiler_buffer_tracing_kind_t kind = ROCPROFILER_BUFFER_TRACING_NONE;
|
|
rocprofiler_buffer_id_t& buffer_id;
|
|
};
|
|
|
|
for(auto&& itr : {buffer_service_config{tool::get_config().kernel_trace,
|
|
ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH,
|
|
get_buffers().kernel_trace},
|
|
buffer_service_config{tool::get_config().memory_copy_trace,
|
|
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
|
|
get_buffers().memory_copy_trace},
|
|
buffer_service_config{tool::get_config().scratch_memory_trace,
|
|
ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY,
|
|
get_buffers().scratch_memory},
|
|
buffer_service_config{tool::get_config().hsa_core_api_trace,
|
|
ROCPROFILER_BUFFER_TRACING_HSA_CORE_API,
|
|
get_buffers().hsa_api_trace},
|
|
buffer_service_config{tool::get_config().hsa_amd_ext_api_trace,
|
|
ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API,
|
|
get_buffers().hsa_api_trace},
|
|
buffer_service_config{tool::get_config().hsa_image_ext_api_trace,
|
|
ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API,
|
|
get_buffers().hsa_api_trace},
|
|
buffer_service_config{tool::get_config().hsa_finalizer_ext_api_trace,
|
|
ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API,
|
|
get_buffers().hsa_api_trace},
|
|
buffer_service_config{tool::get_config().hip_runtime_api_trace,
|
|
ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT,
|
|
get_buffers().hip_api_trace},
|
|
buffer_service_config{tool::get_config().hip_compiler_api_trace,
|
|
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT,
|
|
get_buffers().hip_api_trace},
|
|
buffer_service_config{tool::get_config().rccl_api_trace,
|
|
ROCPROFILER_BUFFER_TRACING_RCCL_API,
|
|
get_buffers().rccl_api_trace},
|
|
buffer_service_config{tool::get_config().memory_allocation_trace,
|
|
ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION,
|
|
get_buffers().memory_allocation_trace},
|
|
buffer_service_config{tool::get_config().rocdecode_api_trace,
|
|
ROCPROFILER_BUFFER_TRACING_ROCDECODE_API_EXT,
|
|
get_buffers().rocdecode_api_trace},
|
|
buffer_service_config{tool::get_config().rocjpeg_api_trace,
|
|
ROCPROFILER_BUFFER_TRACING_ROCJPEG_API,
|
|
get_buffers().rocjpeg_api_trace}})
|
|
{
|
|
if(itr.option)
|
|
{
|
|
// in sdk callback overhead benchmarking, we don't want to use the buffer services
|
|
if(tool::get_config().benchmark_mode == tool::config::benchmark::sdk_callback_overhead)
|
|
continue;
|
|
|
|
if(itr.buffer_id == null_buffer_id)
|
|
{
|
|
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
|
|
buffer_size,
|
|
buffer_watermark,
|
|
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
|
callbacks.buffered_tracing,
|
|
tool_data,
|
|
&itr.buffer_id),
|
|
"buffer creation");
|
|
|
|
ROCP_FATAL_IF(itr.buffer_id.handle == 0) << "failed to create buffer";
|
|
|
|
auto cb_thread = rocprofiler_callback_thread_t{};
|
|
|
|
ROCP_INFO << "creating dedicated callback thread for buffer "
|
|
<< itr.buffer_id.handle;
|
|
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&cb_thread),
|
|
"creating callback thread");
|
|
|
|
ROCP_INFO << "assigning buffer " << itr.buffer_id.handle << " to callback thread "
|
|
<< cb_thread.handle;
|
|
ROCPROFILER_CALL(rocprofiler_assign_callback_thread(itr.buffer_id, cb_thread),
|
|
"assigning callback thread");
|
|
}
|
|
|
|
ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service(
|
|
get_client_ctx(), itr.kind, nullptr, 0, itr.buffer_id),
|
|
"buffer tracing service configure");
|
|
}
|
|
}
|
|
|
|
struct callback_service_config
|
|
{
|
|
bool option = false;
|
|
rocprofiler_callback_tracing_kind_t kind = ROCPROFILER_CALLBACK_TRACING_NONE;
|
|
rocprofiler_callback_tracing_cb_t callback = nullptr;
|
|
};
|
|
|
|
for(auto&& itr : {callback_service_config{tool::get_config().kernel_trace,
|
|
ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH,
|
|
dummy_callback_tracing_callback},
|
|
callback_service_config{tool::get_config().memory_copy_trace,
|
|
ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY,
|
|
dummy_callback_tracing_callback},
|
|
callback_service_config{tool::get_config().scratch_memory_trace,
|
|
ROCPROFILER_CALLBACK_TRACING_SCRATCH_MEMORY,
|
|
dummy_callback_tracing_callback},
|
|
callback_service_config{tool::get_config().hsa_core_api_trace,
|
|
ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API,
|
|
dummy_callback_tracing_callback},
|
|
callback_service_config{tool::get_config().hsa_amd_ext_api_trace,
|
|
ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API,
|
|
dummy_callback_tracing_callback},
|
|
callback_service_config{tool::get_config().hsa_image_ext_api_trace,
|
|
ROCPROFILER_CALLBACK_TRACING_HSA_IMAGE_EXT_API,
|
|
dummy_callback_tracing_callback},
|
|
callback_service_config{tool::get_config().hsa_finalizer_ext_api_trace,
|
|
ROCPROFILER_CALLBACK_TRACING_HSA_FINALIZE_EXT_API,
|
|
dummy_callback_tracing_callback},
|
|
callback_service_config{tool::get_config().hip_runtime_api_trace,
|
|
ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API,
|
|
dummy_callback_tracing_callback},
|
|
callback_service_config{tool::get_config().hip_compiler_api_trace,
|
|
ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API,
|
|
dummy_callback_tracing_callback},
|
|
callback_service_config{tool::get_config().rccl_api_trace,
|
|
ROCPROFILER_CALLBACK_TRACING_RCCL_API,
|
|
dummy_callback_tracing_callback},
|
|
callback_service_config{tool::get_config().memory_allocation_trace,
|
|
ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION,
|
|
dummy_callback_tracing_callback},
|
|
callback_service_config{tool::get_config().rocdecode_api_trace,
|
|
ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API,
|
|
dummy_callback_tracing_callback},
|
|
callback_service_config{tool::get_config().rocjpeg_api_trace,
|
|
ROCPROFILER_CALLBACK_TRACING_ROCJPEG_API,
|
|
dummy_callback_tracing_callback}})
|
|
{
|
|
if(itr.option)
|
|
{
|
|
// in sdk callback overhead benchmarking, we don't want to use the buffer services
|
|
if(tool::get_config().benchmark_mode != tool::config::benchmark::sdk_callback_overhead)
|
|
continue;
|
|
|
|
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
|
|
get_client_ctx(), itr.kind, nullptr, 0, itr.callback, nullptr),
|
|
"callback tracing service failed to configure");
|
|
}
|
|
}
|
|
|
|
if(tool::get_config().advanced_thread_trace)
|
|
{
|
|
auto global_parameters = std::vector<rocprofiler_thread_trace_parameter_t>{};
|
|
uint64_t target_cu = tool::get_config().att_param_target_cu;
|
|
uint64_t simd_select = tool::get_config().att_param_simd_select;
|
|
uint64_t buffer_sz = tool::get_config().att_param_buffer_size;
|
|
uint64_t shader_mask = tool::get_config().att_param_shader_engine_mask;
|
|
uint64_t perfcounter_ctrl = tool::get_config().att_param_perf_ctrl;
|
|
auto& att_perf = tool::get_config().att_param_perfcounters;
|
|
bool att_serialize_all = tool::get_config().att_serialize_all;
|
|
|
|
global_parameters.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU, {target_cu}});
|
|
global_parameters.push_back(
|
|
{ROCPROFILER_THREAD_TRACE_PARAMETER_SIMD_SELECT, {simd_select}});
|
|
global_parameters.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_BUFFER_SIZE, {buffer_sz}});
|
|
global_parameters.push_back(
|
|
{ROCPROFILER_THREAD_TRACE_PARAMETER_SHADER_ENGINE_MASK, {shader_mask}});
|
|
global_parameters.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_SERIALIZE_ALL,
|
|
{static_cast<uint64_t>(att_serialize_all)}});
|
|
|
|
if(perfcounter_ctrl != 0 && !att_perf.empty())
|
|
{
|
|
global_parameters.push_back(
|
|
{ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL, {perfcounter_ctrl}});
|
|
}
|
|
else if(perfcounter_ctrl != 0 || !att_perf.empty())
|
|
{
|
|
ROCP_FATAL << "ATT Perf requires setting both perfcounter_ctrl and perfcounter list!";
|
|
}
|
|
|
|
for(auto& [id, agent] : tool_metadata->agents_map)
|
|
{
|
|
if(agent.type != ROCPROFILER_AGENT_TYPE_GPU) continue;
|
|
|
|
auto agent_params = global_parameters;
|
|
for(auto& counter : get_att_perfcounter_params(id, att_perf))
|
|
agent_params.push_back(counter);
|
|
|
|
ROCPROFILER_CALL(
|
|
rocprofiler_configure_dispatch_thread_trace_service(get_client_ctx(),
|
|
id,
|
|
agent_params.data(),
|
|
agent_params.size(),
|
|
callbacks.att_dispatch,
|
|
callbacks.att_shader_data,
|
|
tool_data),
|
|
"thread trace service configure");
|
|
}
|
|
}
|
|
|
|
if(tool::get_config().counter_collection)
|
|
{
|
|
ROCPROFILER_CALL(rocprofiler_create_context(&counter_collection_ctx),
|
|
"failed to create counter collection context");
|
|
ROCPROFILER_CALL(
|
|
rocprofiler_configure_callback_dispatch_counting_service(counter_collection_ctx,
|
|
callbacks.counter_dispatch,
|
|
nullptr,
|
|
callbacks.counter_record,
|
|
nullptr),
|
|
"Could not setup counting service");
|
|
|
|
start_context(counter_collection_ctx, "counter collection");
|
|
}
|
|
|
|
if(tool::get_config().kernel_rename)
|
|
{
|
|
auto rename_ctx = rocprofiler_context_id_t{0};
|
|
auto marker_core_api_kinds = std::array<rocprofiler_tracing_operation_t, 2>{
|
|
ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA,
|
|
ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA,
|
|
};
|
|
|
|
ROCPROFILER_CALL(rocprofiler_create_context(&rename_ctx), "failed to create context");
|
|
|
|
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
|
|
rename_ctx,
|
|
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API,
|
|
marker_core_api_kinds.data(),
|
|
marker_core_api_kinds.size(),
|
|
callbacks.kernel_rename,
|
|
nullptr),
|
|
"callback tracing service failed to configure");
|
|
|
|
start_context(rename_ctx, "kernel rename");
|
|
}
|
|
|
|
if(!tool::get_config().group_by_queue)
|
|
{
|
|
// Track stream ID information via callback service
|
|
auto hip_stream_display_ctx = rocprofiler_context_id_t{0};
|
|
|
|
ROCPROFILER_CALL(rocprofiler_create_context(&hip_stream_display_ctx),
|
|
"failed to create hip stream context");
|
|
|
|
ROCPROFILER_CALL(
|
|
rocprofiler_configure_callback_tracing_service(hip_stream_display_ctx,
|
|
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM,
|
|
nullptr,
|
|
0,
|
|
callbacks.hip_stream,
|
|
nullptr),
|
|
"hip stream tracing configure failed");
|
|
|
|
start_context(hip_stream_display_ctx, "hip stream");
|
|
|
|
// Track if HIP runtime has been initialized via runtime_intialization service
|
|
auto runtime_initialization_ctx = rocprofiler_context_id_t{0};
|
|
|
|
ROCPROFILER_CALL(rocprofiler_create_context(&runtime_initialization_ctx),
|
|
"failed to create runtime initialization context");
|
|
|
|
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
|
|
runtime_initialization_ctx,
|
|
ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION,
|
|
nullptr,
|
|
0,
|
|
runtime_initialization_callback,
|
|
nullptr),
|
|
"runtime initialization tracing configure failed");
|
|
|
|
start_context(runtime_initialization_ctx, "runtime initialization");
|
|
}
|
|
|
|
if((tool::get_config().kernel_rename || !tool::get_config().group_by_queue) &&
|
|
tool::get_config().benchmark_mode != tool::config::benchmark::execution_profile)
|
|
{
|
|
auto external_corr_id_request_kinds =
|
|
std::array<rocprofiler_external_correlation_id_request_kind_t, 4>{
|
|
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH,
|
|
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY,
|
|
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_ALLOCATION,
|
|
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HIP_RUNTIME_API};
|
|
|
|
ROCPROFILER_CALL(rocprofiler_configure_external_correlation_id_request_service(
|
|
get_client_ctx(),
|
|
external_corr_id_request_kinds.data(),
|
|
external_corr_id_request_kinds.size(),
|
|
set_kernel_rename_and_stream_correlation_id,
|
|
nullptr),
|
|
"Could not configure external correlation id request service");
|
|
|
|
if(tool::get_config().counter_collection)
|
|
{
|
|
auto counter_external_corr_id_request_kinds =
|
|
std::array<rocprofiler_external_correlation_id_request_kind_t, 1>{
|
|
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH};
|
|
|
|
ROCPROFILER_CALL(rocprofiler_configure_external_correlation_id_request_service(
|
|
counter_collection_ctx,
|
|
counter_external_corr_id_request_kinds.data(),
|
|
counter_external_corr_id_request_kinds.size(),
|
|
set_kernel_rename_and_stream_correlation_id,
|
|
nullptr),
|
|
"Could not configure external correlation id request service");
|
|
}
|
|
}
|
|
|
|
if(tool::get_config().pc_sampling_host_trap)
|
|
{
|
|
configure_pc_sampling_on_all_agents(
|
|
buffer_size, buffer_watermark, tool_data, callbacks.pc_sampling);
|
|
}
|
|
else if(tool::get_config().pc_sampling_stochastic)
|
|
{
|
|
configure_pc_sampling_on_all_agents(
|
|
buffer_size, buffer_watermark, tool_data, callbacks.pc_sampling);
|
|
}
|
|
|
|
for(auto itr : get_buffers().pc_sampling_buffers_as_array())
|
|
{
|
|
if(itr > null_buffer_id)
|
|
{
|
|
auto cb_thread = rocprofiler_callback_thread_t{};
|
|
|
|
ROCP_INFO << "creating dedicated callback thread for buffer " << itr.handle;
|
|
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&cb_thread),
|
|
"creating callback thread");
|
|
|
|
ROCP_INFO << "assigning buffer " << itr.handle << " to callback thread "
|
|
<< cb_thread.handle;
|
|
ROCPROFILER_CALL(rocprofiler_assign_callback_thread(itr, cb_thread),
|
|
"assigning callback thread");
|
|
}
|
|
}
|
|
|
|
// Handle kernel id of zero
|
|
bool include = std::regex_search("0", std::regex(tool::get_config().kernel_filter_include));
|
|
bool exclude = std::regex_search("0", std::regex(tool::get_config().kernel_filter_exclude));
|
|
if(include && (!exclude || tool::get_config().kernel_filter_exclude.empty()))
|
|
add_kernel_target(0, tool::get_config().kernel_filter_range);
|
|
|
|
if(tool::get_config().benchmark_mode == tool::config::benchmark::disabled_contexts_overhead)
|
|
{
|
|
ROCP_INFO << "rocprofv3 is not recording data because the overhead of inactive contexts is "
|
|
"being benchmarked";
|
|
}
|
|
else if(tool::get_config().selected_regions)
|
|
{
|
|
ROCP_WARNING << "rocprofv3 is only recording profiling data within regions of code "
|
|
"surrounded by roctxProfilerResume(0)/roctxProfilerPause";
|
|
}
|
|
else if(!tool::get_config().collection_periods.empty())
|
|
{
|
|
ROCP_INFO << "rocprofv3 will record data during the defined collection period(s)";
|
|
|
|
auto _prom = std::promise<void>{};
|
|
auto _fut = _prom.get_future();
|
|
std::thread{collection_period_cntrl, std::move(_prom), get_client_ctx()}.detach();
|
|
_fut.wait_for(std::chrono::seconds{1}); // wait for a max of 1 second
|
|
}
|
|
else
|
|
{
|
|
ROCP_INFO << "rocprofv3 will record data starting now";
|
|
|
|
start_context(get_client_ctx(), "primary rocprofv3");
|
|
}
|
|
|
|
tool_metadata->set_process_id(getpid(), getppid());
|
|
// set_process_id should set process_start_ns unless it cannot read from /proc/<pid>/stat
|
|
if(tool_metadata->process_start_ns == 0)
|
|
rocprofiler_get_timestamp(&(tool_metadata->process_start_ns));
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
api_timestamps_callback(rocprofiler_intercept_table_t table_id,
|
|
uint64_t lib_version,
|
|
uint64_t lib_instance,
|
|
void** /*tables*/,
|
|
uint64_t /*num_tables*/,
|
|
void* /*user_data*/)
|
|
{
|
|
static auto _once = std::once_flag{};
|
|
|
|
// compute major/minor/patch version info
|
|
uint32_t major = lib_version / 10000;
|
|
uint32_t minor = (lib_version % 10000) / 100;
|
|
uint32_t patch = lib_version % 100;
|
|
|
|
const char* table_name = nullptr;
|
|
ROCPROFILER_CHECK(rocprofiler_query_intercept_table_name(table_id, &table_name, nullptr));
|
|
|
|
ROCP_WARNING_IF(table_id != ROCPROFILER_MARKER_CONTROL_TABLE &&
|
|
table_id != ROCPROFILER_MARKER_NAME_TABLE && table_name)
|
|
<< fmt::format("{} version {}.{}.{} initialized (instance={})",
|
|
table_name,
|
|
major,
|
|
minor,
|
|
patch,
|
|
lib_instance);
|
|
|
|
std::call_once(_once, []() {
|
|
if(CHECK_NOTNULL(tool_metadata)->process_start_ns == 0)
|
|
rocprofiler_get_timestamp(&(tool_metadata->process_start_ns));
|
|
});
|
|
}
|
|
|
|
using stats_data_t = tool::stats_data_t;
|
|
using stats_entry_t = tool::stats_entry_t;
|
|
using domain_stats_vec_t = tool::domain_stats_vec_t;
|
|
using cleanup_vec_t = std::vector<std::function<void()>>;
|
|
|
|
struct output_data
|
|
{
|
|
uint64_t num_output = 0;
|
|
uint64_t num_bytes = 0;
|
|
};
|
|
|
|
void
|
|
generate_config_output(const tool::config& cfg, const tool::metadata& tool_metadata_v)
|
|
{
|
|
using JSONOutputArchive = ::cereal::PrettyJSONOutputArchive;
|
|
|
|
constexpr auto json_prec = 16;
|
|
constexpr auto json_indent = JSONOutputArchive::Options::IndentChar::space;
|
|
auto json_opts = JSONOutputArchive::Options{json_prec, json_indent, 2};
|
|
auto filename = std::string_view{"config"};
|
|
|
|
auto stream = get_output_stream(cfg, filename, ".json");
|
|
{
|
|
auto archive = JSONOutputArchive{*stream.stream, json_opts};
|
|
|
|
archive.setNextName("rocprofiler-sdk-tool");
|
|
archive.startNode();
|
|
archive.makeArray();
|
|
archive.startNode(); // first array entry
|
|
|
|
auto timestamps =
|
|
tool::timestamps_t{tool_metadata_v.process_start_ns, tool_metadata_v.process_end_ns};
|
|
|
|
auto this_pid = tool_metadata_v.process_id;
|
|
|
|
archive.setNextName("metadata");
|
|
archive.startNode();
|
|
archive(cereal::make_nvp("pid", this_pid));
|
|
archive(cereal::make_nvp("init_time", timestamps.app_start_time));
|
|
archive(cereal::make_nvp("fini_time", timestamps.app_end_time));
|
|
archive(cereal::make_nvp("config", cfg));
|
|
archive(cereal::make_nvp("command", common::read_command_line(this_pid)));
|
|
|
|
{
|
|
archive.setNextName("build_spec");
|
|
archive.startNode();
|
|
archive(cereal::make_nvp("version_major", ROCPROFILER_VERSION_MAJOR));
|
|
archive(cereal::make_nvp("version_minor", ROCPROFILER_VERSION_MINOR));
|
|
archive(cereal::make_nvp("version_patch", ROCPROFILER_VERSION_PATCH));
|
|
archive(cereal::make_nvp("soversion", ROCPROFILER_SOVERSION));
|
|
archive(cereal::make_nvp("compiler_id", std::string{ROCPROFILER_COMPILER_ID}));
|
|
archive(
|
|
cereal::make_nvp("compiler_version", std::string{ROCPROFILER_COMPILER_VERSION}));
|
|
archive(cereal::make_nvp("git_describe", std::string{ROCPROFILER_GIT_DESCRIBE}));
|
|
archive(cereal::make_nvp("git_revision", std::string{ROCPROFILER_GIT_REVISION}));
|
|
archive(cereal::make_nvp("library_arch", std::string{ROCPROFILER_LIBRARY_ARCH}));
|
|
archive(cereal::make_nvp("system_name", std::string{ROCPROFILER_SYSTEM_NAME}));
|
|
archive(
|
|
cereal::make_nvp("system_processor", std::string{ROCPROFILER_SYSTEM_PROCESSOR}));
|
|
archive(cereal::make_nvp("system_version", std::string{ROCPROFILER_SYSTEM_VERSION}));
|
|
archive.finishNode(); // build_spec
|
|
}
|
|
|
|
// save the execution profile
|
|
if(execution_profile) archive(cereal::make_nvp("profile", execution_profile->get()));
|
|
|
|
// save the environment variables
|
|
{
|
|
archive.setNextName("environment");
|
|
archive.startNode();
|
|
size_t idx = 0;
|
|
while(true)
|
|
{
|
|
const auto* env_entry = environ[idx++];
|
|
if(!env_entry)
|
|
break;
|
|
else if(std::string_view{env_entry}.find('=') != std::string_view::npos)
|
|
{
|
|
auto _entry = std::string{env_entry};
|
|
auto _pos = _entry.find('=');
|
|
auto _name = _entry.substr(0, _pos);
|
|
auto _value = _entry.substr(_pos + 1);
|
|
archive(cereal::make_nvp(_name.c_str(), _value));
|
|
}
|
|
}
|
|
archive.finishNode();
|
|
}
|
|
|
|
archive.finishNode(); // metadata
|
|
archive.finishNode(); // first array entry
|
|
archive.finishNode(); // rocprofiler-sdk-tool
|
|
}
|
|
stream.close();
|
|
}
|
|
|
|
template <typename Tp, domain_type DomainT>
|
|
void
|
|
generate_output(tool::buffered_output<Tp, DomainT>& output_v,
|
|
output_data& output_data_v,
|
|
domain_stats_vec_t& contributions_v,
|
|
cleanup_vec_t& cleanups_v)
|
|
{
|
|
cleanups_v.emplace_back([&output_v]() { output_v.destroy(); });
|
|
|
|
if(!output_v) return;
|
|
|
|
// when benchmarking, we do not generate output
|
|
if(tool::get_config().benchmark_mode != tool::config::benchmark::none) return;
|
|
|
|
// opens temporary file and sets read position to beginning
|
|
output_v.read();
|
|
|
|
if(output_v.get_generator().empty()) return;
|
|
|
|
// if it has reached this point, the generator is not empty
|
|
auto _num_bytes = output_v.get_num_bytes();
|
|
output_data_v.num_output += 1;
|
|
output_data_v.num_bytes += _num_bytes;
|
|
|
|
if(tool::get_config().stats || tool::get_config().summary_output)
|
|
{
|
|
output_v.stats =
|
|
tool::generate_stats(tool::get_config(), *tool_metadata, output_v.get_generator());
|
|
}
|
|
|
|
if(output_v.stats)
|
|
{
|
|
contributions_v.emplace_back(output_v.buffer_type_v, output_v.stats);
|
|
}
|
|
|
|
if(tool::get_config().csv_output && _num_bytes >= tool::get_config().minimum_output_bytes)
|
|
{
|
|
tool::generate_csv(
|
|
tool::get_config(), *tool_metadata, output_v.get_generator(), output_v.stats);
|
|
}
|
|
}
|
|
|
|
void
|
|
tool_fini(void* /*tool_data*/)
|
|
{
|
|
static bool _first = true;
|
|
if(!_first) return;
|
|
_first = false;
|
|
|
|
client_identifier = nullptr;
|
|
client_finalizer = nullptr;
|
|
|
|
auto _fini_timer = common::simple_timer{"[rocprofv3] tool finalization"};
|
|
|
|
if(tool_metadata->process_end_ns == 0)
|
|
rocprofiler_get_timestamp(&(tool_metadata->process_end_ns));
|
|
|
|
flush();
|
|
rocprofiler_stop_context(get_client_ctx());
|
|
flush();
|
|
|
|
auto kernel_dispatch_output =
|
|
rocprofiler::tool::kernel_dispatch_buffered_output_ext_t{tool::get_config().kernel_trace};
|
|
|
|
auto hsa_output = tool::hsa_buffered_output_t{tool::get_config().hsa_core_api_trace ||
|
|
tool::get_config().hsa_amd_ext_api_trace ||
|
|
tool::get_config().hsa_image_ext_api_trace ||
|
|
tool::get_config().hsa_finalizer_ext_api_trace};
|
|
auto hip_output = tool::hip_buffered_output_t{tool::get_config().hip_runtime_api_trace ||
|
|
tool::get_config().hip_compiler_api_trace};
|
|
auto memory_copy_output =
|
|
tool::memory_copy_buffered_output_ext_t{tool::get_config().memory_copy_trace};
|
|
auto marker_output = tool::marker_buffered_output_t{tool::get_config().marker_api_trace};
|
|
auto counters_output =
|
|
tool::counter_collection_buffered_output_t{tool::get_config().counter_collection};
|
|
auto scratch_memory_output =
|
|
tool::scratch_memory_buffered_output_t{tool::get_config().scratch_memory_trace};
|
|
auto rccl_output = tool::rccl_buffered_output_t{tool::get_config().rccl_api_trace};
|
|
auto memory_allocation_output =
|
|
tool::memory_allocation_buffered_output_t{tool::get_config().memory_allocation_trace};
|
|
auto counters_records_output =
|
|
tool::counter_records_buffered_output_t{tool::get_config().counter_collection};
|
|
auto pc_sampling_host_trap_output =
|
|
tool::pc_sampling_host_trap_buffered_output_t{tool::get_config().pc_sampling_host_trap};
|
|
auto rocdecode_output =
|
|
tool::rocdecode_buffered_output_t{tool::get_config().rocdecode_api_trace};
|
|
auto rocjpeg_output = tool::rocjpeg_buffered_output_t{tool::get_config().rocjpeg_api_trace};
|
|
auto pc_sampling_stochastic_output =
|
|
tool::pc_sampling_stochastic_buffered_output_t{tool::get_config().pc_sampling_stochastic};
|
|
|
|
auto node_id_sort = [](const auto& lhs, const auto& rhs) { return lhs.node_id < rhs.node_id; };
|
|
auto agents_output = CHECK_NOTNULL(tool_metadata)->agents;
|
|
std::sort(agents_output.begin(), agents_output.end(), node_id_sort);
|
|
|
|
auto outdata = output_data{};
|
|
auto contributions = domain_stats_vec_t{};
|
|
auto cleanups = cleanup_vec_t{};
|
|
|
|
auto run_cleanup = [&cleanups]() {
|
|
for(const auto& itr : cleanups)
|
|
{
|
|
if(itr) itr();
|
|
}
|
|
cleanups.clear();
|
|
};
|
|
|
|
// generate the configuration output regardless of whether there is any data
|
|
if(tool::get_config().output_config_file)
|
|
{
|
|
generate_config_output(tool::get_config(), *tool_metadata);
|
|
}
|
|
|
|
auto _dtor = common::scope_destructor{run_cleanup};
|
|
|
|
generate_output(kernel_dispatch_output, outdata, contributions, cleanups);
|
|
generate_output(hsa_output, outdata, contributions, cleanups);
|
|
generate_output(hip_output, outdata, contributions, cleanups);
|
|
generate_output(memory_copy_output, outdata, contributions, cleanups);
|
|
generate_output(memory_allocation_output, outdata, contributions, cleanups);
|
|
generate_output(marker_output, outdata, contributions, cleanups);
|
|
generate_output(rccl_output, outdata, contributions, cleanups);
|
|
generate_output(counters_output, outdata, contributions, cleanups);
|
|
generate_output(scratch_memory_output, outdata, contributions, cleanups);
|
|
generate_output(rocdecode_output, outdata, contributions, cleanups);
|
|
generate_output(pc_sampling_host_trap_output, outdata, contributions, cleanups);
|
|
generate_output(rocjpeg_output, outdata, contributions, cleanups);
|
|
generate_output(pc_sampling_stochastic_output, outdata, contributions, cleanups);
|
|
|
|
if(tool::get_config().advanced_thread_trace && !tool_metadata->att_filenames.empty())
|
|
{
|
|
outdata.num_output += 1;
|
|
}
|
|
|
|
ROCP_INFO << fmt::format("Number of services generating output: {} ({} kB)",
|
|
outdata.num_output,
|
|
(outdata.num_bytes / 1024));
|
|
|
|
if(tool::get_config().csv_output && outdata.num_output > 0 &&
|
|
outdata.num_bytes >= tool::get_config().minimum_output_bytes)
|
|
{
|
|
tool::generate_csv(tool::get_config(), *tool_metadata, agents_output);
|
|
}
|
|
|
|
if(tool::get_config().stats && tool::get_config().csv_output && outdata.num_output > 0 &&
|
|
outdata.num_bytes >= tool::get_config().minimum_output_bytes)
|
|
{
|
|
tool::generate_csv(tool::get_config(), *tool_metadata, contributions);
|
|
}
|
|
|
|
if(tool::get_config().json_output && outdata.num_output > 0 &&
|
|
outdata.num_bytes >= tool::get_config().minimum_output_bytes)
|
|
{
|
|
auto json_ar = tool::open_json(tool::get_config());
|
|
|
|
json_ar.start_process();
|
|
tool::write_json(json_ar, tool::get_config(), *tool_metadata, getpid());
|
|
tool::write_json(json_ar,
|
|
tool::get_config(),
|
|
*tool_metadata,
|
|
contributions,
|
|
hip_output.get_generator(),
|
|
hsa_output.get_generator(),
|
|
kernel_dispatch_output.get_generator(),
|
|
memory_copy_output.get_generator(),
|
|
counters_output.get_generator(),
|
|
marker_output.get_generator(),
|
|
scratch_memory_output.get_generator(),
|
|
rccl_output.get_generator(),
|
|
memory_allocation_output.get_generator(),
|
|
rocdecode_output.get_generator(),
|
|
rocjpeg_output.get_generator(),
|
|
pc_sampling_host_trap_output.get_generator(),
|
|
pc_sampling_stochastic_output.get_generator());
|
|
json_ar.finish_process();
|
|
|
|
tool::close_json(json_ar);
|
|
}
|
|
|
|
if(tool::get_config().pftrace_output && outdata.num_output > 0 &&
|
|
outdata.num_bytes >= tool::get_config().minimum_output_bytes)
|
|
{
|
|
tool::write_perfetto(tool::get_config(),
|
|
*tool_metadata,
|
|
agents_output,
|
|
hip_output.get_generator(),
|
|
hsa_output.get_generator(),
|
|
kernel_dispatch_output.get_generator(),
|
|
memory_copy_output.get_generator(),
|
|
counters_output.get_generator(),
|
|
marker_output.get_generator(),
|
|
scratch_memory_output.get_generator(),
|
|
rccl_output.get_generator(),
|
|
memory_allocation_output.get_generator(),
|
|
rocdecode_output.get_generator(),
|
|
rocjpeg_output.get_generator());
|
|
}
|
|
|
|
if(tool::get_config().rocpd_output && outdata.num_output > 0 &&
|
|
outdata.num_bytes >= tool::get_config().minimum_output_bytes)
|
|
{
|
|
tool::write_rocpd(tool::get_config(),
|
|
*tool_metadata,
|
|
agents_output,
|
|
hip_output.get_generator(),
|
|
hsa_output.get_generator(),
|
|
kernel_dispatch_output.get_generator(),
|
|
memory_copy_output.get_generator(),
|
|
marker_output.get_generator(),
|
|
memory_allocation_output.get_generator(),
|
|
scratch_memory_output.get_generator(),
|
|
rccl_output.get_generator(),
|
|
rocdecode_output.get_generator(),
|
|
counters_output.get_generator());
|
|
}
|
|
|
|
if(tool::get_config().otf2_output && outdata.num_output > 0 &&
|
|
outdata.num_bytes >= tool::get_config().minimum_output_bytes)
|
|
{
|
|
auto hip_elem_data = hip_output.load_all();
|
|
auto hsa_elem_data = hsa_output.load_all();
|
|
auto kernel_dispatch_elem_data = kernel_dispatch_output.load_all();
|
|
auto memory_copy_elem_data = memory_copy_output.load_all();
|
|
auto marker_elem_data = marker_output.load_all();
|
|
auto scratch_memory_elem_data = scratch_memory_output.load_all();
|
|
auto rccl_elem_data = rccl_output.load_all();
|
|
auto memory_allocation_elem_data = memory_allocation_output.load_all();
|
|
auto rocdecode_elem_data = rocdecode_output.load_all();
|
|
auto rocjpeg_elem_data = rocjpeg_output.load_all();
|
|
|
|
tool::write_otf2(tool::get_config(),
|
|
*tool_metadata,
|
|
getpid(),
|
|
agents_output,
|
|
&hip_elem_data,
|
|
&hsa_elem_data,
|
|
&kernel_dispatch_elem_data,
|
|
&memory_copy_elem_data,
|
|
&marker_elem_data,
|
|
&scratch_memory_elem_data,
|
|
&rccl_elem_data,
|
|
&memory_allocation_elem_data,
|
|
&rocdecode_elem_data,
|
|
&rocjpeg_elem_data);
|
|
}
|
|
|
|
if(tool::get_config().summary_output && outdata.num_output > 0 &&
|
|
outdata.num_bytes >= tool::get_config().minimum_output_bytes)
|
|
{
|
|
tool::generate_stats(tool::get_config(), *tool_metadata, contributions);
|
|
}
|
|
|
|
if(tool::get_config().advanced_thread_trace)
|
|
{
|
|
auto decoder = rocprofiler::att_wrapper::ATTDecoder(tool::get_config().att_library_path);
|
|
ROCP_FATAL_IF(!decoder.valid()) << "Decoder library not found!";
|
|
|
|
auto codeobj = tool_metadata->get_code_object_load_info();
|
|
auto output_path = tool::format_path(tool::get_config().output_path);
|
|
|
|
std::vector<std::string> perf{};
|
|
for(auto& counter : tool::get_config().att_param_perfcounters)
|
|
{
|
|
std::stringstream ss;
|
|
ss << counter.counter_name;
|
|
|
|
if(counter.simd_mask != 0xF) ss << ':' << std::hex << counter.simd_mask;
|
|
|
|
perf.emplace_back(ss.str());
|
|
}
|
|
|
|
for(auto& [dispatch_id, att_filename_data] : tool_metadata->att_filenames)
|
|
{
|
|
std::string formats = "json,csv";
|
|
|
|
auto ui_name = std::stringstream{};
|
|
ui_name << fmt::format("ui_output_agent_{}_dispatch_{}",
|
|
std::to_string(att_filename_data.first.handle),
|
|
dispatch_id);
|
|
auto out_path = fmt::format("{}/{}", output_path, ui_name.str());
|
|
auto in_path = std::string(".");
|
|
|
|
decoder.parse(in_path, out_path, att_filename_data.second, codeobj, perf, formats);
|
|
}
|
|
}
|
|
|
|
run_cleanup();
|
|
|
|
if(destructors)
|
|
{
|
|
for(const auto& itr : *destructors)
|
|
itr();
|
|
delete destructors;
|
|
destructors = nullptr;
|
|
}
|
|
|
|
#if defined(CODECOV) && CODECOV > 0
|
|
__gcov_dump();
|
|
#endif
|
|
}
|
|
|
|
std::vector<rocprofiler_counter_record_dimension_info_t>
|
|
get_tool_counter_dimension_info()
|
|
{
|
|
auto _data = get_agent_counter_info();
|
|
auto _ret = std::vector<rocprofiler_counter_record_dimension_info_t>{};
|
|
for(const auto& itr : _data)
|
|
{
|
|
for(const auto& iitr : itr.second)
|
|
for(const auto& ditr : iitr.dimensions)
|
|
_ret.emplace_back(ditr);
|
|
}
|
|
|
|
auto _sorter = [](const rocprofiler_counter_record_dimension_info_t& lhs,
|
|
const rocprofiler_counter_record_dimension_info_t& rhs) {
|
|
return std::tie(lhs.id, lhs.instance_size) < std::tie(rhs.id, rhs.instance_size);
|
|
};
|
|
auto _equiv = [](const rocprofiler_counter_record_dimension_info_t& lhs,
|
|
const rocprofiler_counter_record_dimension_info_t& rhs) {
|
|
return std::tie(lhs.id, lhs.instance_size) == std::tie(rhs.id, rhs.instance_size);
|
|
};
|
|
|
|
std::sort(_ret.begin(), _ret.end(), _sorter);
|
|
_ret.erase(std::unique(_ret.begin(), _ret.end(), _equiv), _ret.end());
|
|
|
|
return _ret;
|
|
}
|
|
|
|
namespace
|
|
{
|
|
using main_func_t = int (*)(int, char**, char**);
|
|
|
|
main_func_t&
|
|
get_main_function()
|
|
{
|
|
static main_func_t user_main = nullptr;
|
|
return user_main;
|
|
}
|
|
|
|
signal_func_t&
|
|
get_signal_function()
|
|
{
|
|
static signal_func_t user_signal = nullptr;
|
|
return user_signal;
|
|
}
|
|
|
|
sigaction_func_t&
|
|
get_sigaction_function()
|
|
{
|
|
static sigaction_func_t user_sigaction = (sigaction_func_t) dlsym(RTLD_NEXT, "sigaction");
|
|
return user_sigaction;
|
|
}
|
|
|
|
bool signal_handler_exit =
|
|
rocprofiler::tool::get_env("ROCPROF_INTERNAL_TEST_SIGNAL_HANDLER_VIA_EXIT", false);
|
|
} // namespace
|
|
|
|
#define ROCPROFV3_INTERNAL_API __attribute__((visibility("internal")));
|
|
|
|
std::optional<int>
|
|
wait_pid(pid_t _pid, int _opts = 0)
|
|
{
|
|
auto this_pid = getpid();
|
|
auto this_ppid = getppid();
|
|
auto this_tid = common::get_tid();
|
|
auto this_func = std::string_view{__FUNCTION__};
|
|
|
|
ROCP_INFO << fmt::format("[PPID={}][PID={}][TID={}][{}] rocprofv3 waiting for child {}",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
_pid);
|
|
|
|
int _status = 0;
|
|
pid_t _pid_v = -1;
|
|
_opts |= WUNTRACED;
|
|
do
|
|
{
|
|
if((_opts & WNOHANG) > 0)
|
|
{
|
|
std::this_thread::yield();
|
|
std::this_thread::sleep_for(std::chrono::milliseconds{100});
|
|
}
|
|
_pid_v = waitpid(_pid, &_status, _opts);
|
|
} while(_pid_v == 0);
|
|
|
|
if(_pid_v < 0) return std::nullopt;
|
|
return _status;
|
|
}
|
|
|
|
extern "C" {
|
|
void
|
|
rocprofv3_set_main(main_func_t main_func) ROCPROFV3_INTERNAL_API;
|
|
|
|
int
|
|
diagnose_status(pid_t _pid, int _status)
|
|
{
|
|
auto this_pid = getpid();
|
|
auto this_ppid = getppid();
|
|
auto this_tid = common::get_tid();
|
|
auto this_func = std::string_view{__FUNCTION__};
|
|
|
|
bool _normal_exit = (WIFEXITED(_status) > 0);
|
|
bool _unhandled_signal = (WIFSIGNALED(_status) > 0);
|
|
bool _core_dump = (WCOREDUMP(_status) > 0);
|
|
bool _stopped = (WIFSTOPPED(_status) > 0);
|
|
int _exit_status = WEXITSTATUS(_status);
|
|
int _stop_signal = (_stopped) ? WSTOPSIG(_status) : 0;
|
|
int _ec = (_unhandled_signal) ? WTERMSIG(_status) : 0;
|
|
|
|
ROCP_TRACE << fmt::format("[PPID={}][PID={}][TID={}][{}] diagnosing status for process {} :: "
|
|
"status: {}, normal exit: {}, unhandled signal: {}, core dump: {}, "
|
|
"stopped: {}, exit status: {}, stop signal: {}, exit code: {}",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
_pid,
|
|
_status,
|
|
std::to_string(static_cast<int>(_normal_exit)),
|
|
std::to_string(static_cast<int>(_unhandled_signal)),
|
|
std::to_string(static_cast<int>(_core_dump)),
|
|
std::to_string(static_cast<int>(_stopped)),
|
|
_exit_status,
|
|
_stop_signal,
|
|
_ec);
|
|
|
|
if(!_normal_exit)
|
|
{
|
|
if(_ec == 0) _ec = EXIT_FAILURE;
|
|
ROCP_INFO << fmt::format(
|
|
"[PPID={}][PID={}][TID={}][{}] process {} terminated abnormally. exit code: {}",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
_pid,
|
|
_ec);
|
|
}
|
|
|
|
if(_stopped)
|
|
{
|
|
ROCP_INFO << fmt::format(
|
|
"[PPID={}][PID={}][TID={}][{}] process {} stopped with signal {}. exit code: {}",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
_pid,
|
|
_stop_signal,
|
|
_ec);
|
|
}
|
|
|
|
if(_core_dump)
|
|
{
|
|
ROCP_INFO << fmt::format("[PPID={}][PID={}][TID={}][{}] process {} terminated and "
|
|
"produced a core dump. exit code: {}",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
_pid,
|
|
_ec);
|
|
}
|
|
|
|
if(_unhandled_signal)
|
|
{
|
|
ROCP_INFO << fmt::format(
|
|
"[PPID={}][PID={}][TID={}][{}] process {} terminated because it received a signal "
|
|
"({}) that was not handled. exit code: {}",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
_pid,
|
|
_ec,
|
|
_ec);
|
|
}
|
|
|
|
if(!_normal_exit && _exit_status > 0)
|
|
{
|
|
if(_exit_status == 127)
|
|
{
|
|
ROCP_INFO << fmt::format(
|
|
"[PPID={}][PID={}][TID={}][{}] execv in process {} failed. exit code: {}",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
_pid,
|
|
_ec);
|
|
}
|
|
else
|
|
{
|
|
ROCP_INFO << fmt::format("[PPID={}][PID={}][TID={}][{}] process {} terminated with "
|
|
"a non-zero status. exit code: {}",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
_pid,
|
|
_ec);
|
|
}
|
|
}
|
|
|
|
return _ec;
|
|
}
|
|
|
|
void
|
|
rocprofv3_error_signal_handler(int signo, siginfo_t* info, void* ucontext)
|
|
{
|
|
auto this_pid = getpid();
|
|
auto this_ppid = getppid();
|
|
auto this_tid = common::get_tid();
|
|
auto this_func = std::string_view{__FUNCTION__};
|
|
|
|
ROCP_WARNING << fmt::format("[PPID={}][PID={}][TID={}][{}] rocprofv3 caught signal {}...",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
signo);
|
|
|
|
static auto _once = std::once_flag{};
|
|
std::call_once(_once, [&]() {
|
|
auto get_children = [&this_pid]() {
|
|
auto fname = fmt::format("/proc/{}/task/{}/children", this_pid, this_pid);
|
|
auto ifs = std::ifstream{fname};
|
|
auto children = std::vector<pid_t>{};
|
|
while(ifs)
|
|
{
|
|
pid_t val = 0;
|
|
ifs >> val;
|
|
if(ifs && !ifs.eof() && val > 0) children.emplace_back(val);
|
|
}
|
|
return children;
|
|
};
|
|
|
|
auto _children = get_children();
|
|
ROCP_WARNING << fmt::format(
|
|
"[PPID={}][PID={}][TID={}][{}] rocprofv3 will wait for {} children to exit",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
_children.size());
|
|
|
|
// wait for children
|
|
for(auto itr : _children)
|
|
{
|
|
auto status = wait_pid(itr, WUNTRACED | WNOHANG);
|
|
if(status) diagnose_status(itr, status.value());
|
|
}
|
|
|
|
ROCP_WARNING << fmt::format(
|
|
"[PPID={}][PID={}][TID={}][{}] rocprofv3 finalizing after signal {}...",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
signo);
|
|
|
|
finalize_rocprofv3(this_func);
|
|
|
|
ROCP_INFO << fmt::format(
|
|
"[PPID={}][PID={}][TID={}][{}] rocprofv3 finalizing after signal {}... complete",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
signo);
|
|
|
|
if(get_chained_signals().at(signo))
|
|
{
|
|
ROCP_INFO << fmt::format(
|
|
"[PPID={}][PID={}][TID={}][{}] rocprofv3 found chained signal handler for {}",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
signo);
|
|
|
|
if(auto& _chained = *get_chained_signals().at(signo); _chained.action)
|
|
{
|
|
ROCP_TRACE << fmt::format("[PPID={}][PID={}][TID={}][{}] rocprofv3 found chained "
|
|
"signal handler for {}... executing chained sigaction",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
signo);
|
|
if((_chained.action->sa_flags & SA_SIGINFO) == SA_SIGINFO &&
|
|
_chained.action->sa_sigaction &&
|
|
_chained.action->sa_sigaction != &rocprofv3_error_signal_handler)
|
|
{
|
|
ROCP_WARNING << fmt::format(
|
|
"[PPID={}][PID={}][TID={}][{}] rocprofv3 found chained signal handler for "
|
|
"{}... executing chained sigaction (SIGINFO)",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
signo);
|
|
_chained.action->sa_sigaction(signo, info, ucontext);
|
|
}
|
|
else if((_chained.action->sa_flags & SA_SIGINFO) != SA_SIGINFO &&
|
|
_chained.action->sa_handler &&
|
|
_chained.action->sa_sigaction != &rocprofv3_error_signal_handler)
|
|
{
|
|
ROCP_WARNING << fmt::format(
|
|
"[PPID={}][PID={}][TID={}][{}] rocprofv3 found chained signal handler for "
|
|
"{}... executing chained sigaction (HANDLER)",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
signo);
|
|
_chained.action->sa_handler(signo);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if(_chained.handler)
|
|
{
|
|
ROCP_WARNING << fmt::format(
|
|
"[PPID={}][PID={}][TID={}][{}] rocprofv3 found chained signal handler for "
|
|
"{}... executing chained handler",
|
|
this_ppid,
|
|
this_pid,
|
|
this_tid,
|
|
this_func,
|
|
signo);
|
|
_chained.handler(signo);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
// below is for testing purposes. re-raising the signal causes CTest to ignore WILL_FAIL ON
|
|
if(signal_handler_exit) ::quick_exit(signo);
|
|
::raise(signo);
|
|
}
|
|
|
|
int
|
|
rocprofv3_main(int argc, char** argv, char** envp) ROCPROFV3_INTERNAL_API;
|
|
|
|
sighandler_t
|
|
rocprofv3_signal(int signum, sighandler_t handler) ROCPROFV3_INTERNAL_API;
|
|
|
|
int
|
|
rocprofv3_sigaction(int signum,
|
|
const struct sigaction* __restrict__ act,
|
|
struct sigaction* __restrict__ oldact) ROCPROFV3_INTERNAL_API;
|
|
|
|
rocprofiler_tool_configure_result_t*
|
|
rocprofiler_configure(uint32_t version,
|
|
const char* runtime_version,
|
|
uint32_t priority,
|
|
rocprofiler_client_id_t* id)
|
|
{
|
|
initialize_logging();
|
|
|
|
// set the client name
|
|
id->name = "rocprofv3";
|
|
|
|
// store client info
|
|
client_identifier = id;
|
|
|
|
// note that rocprofv3 is not the primary tool
|
|
ROCP_WARNING_IF(priority > 0) << id->name << " has a priority of " << priority
|
|
<< " (not primary tool)";
|
|
|
|
// compute major/minor/patch version info
|
|
uint32_t major = version / 10000;
|
|
uint32_t minor = (version % 10000) / 100;
|
|
uint32_t patch = version % 100;
|
|
|
|
// ensure these pointers are not leaked
|
|
add_destructor(tool_metadata);
|
|
add_destructor(execution_profile);
|
|
|
|
// in case main wrapper is not used
|
|
::atexit([]() { finalize_rocprofv3("atexit"); });
|
|
|
|
tool::get_tmp_file_name_callback() = [](domain_type type) -> std::string {
|
|
return compose_tmp_file_name(tool::get_config(), type);
|
|
};
|
|
|
|
if(!tool::get_config().extra_counters_contents.empty())
|
|
{
|
|
std::string contents(tool::get_config().extra_counters_contents);
|
|
size_t length = contents.size();
|
|
ROCPROFILER_CALL(rocprofiler_load_counter_definition(
|
|
contents.c_str(), length, ROCPROFILER_COUNTER_FLAG_APPEND_DEFINITION),
|
|
"Loading extra counters");
|
|
}
|
|
|
|
int libs = ROCPROFILER_HSA_TABLE;
|
|
if(tool::get_config().hip_compiler_api_trace) libs |= ROCPROFILER_HIP_COMPILER_TABLE;
|
|
if(tool::get_config().hip_runtime_api_trace) libs |= ROCPROFILER_HIP_RUNTIME_TABLE;
|
|
if(tool::get_config().rccl_api_trace) libs |= ROCPROFILER_RCCL_TABLE;
|
|
if(tool::get_config().marker_api_trace) libs |= ROCPROFILER_MARKER_CORE_TABLE;
|
|
if(tool::get_config().rocdecode_api_trace) libs |= ROCPROFILER_ROCDECODE_TABLE;
|
|
if(tool::get_config().rocjpeg_api_trace) libs |= ROCPROFILER_ROCJPEG_TABLE;
|
|
|
|
ROCPROFILER_CALL(
|
|
rocprofiler_at_intercept_table_registration(api_timestamps_callback, libs, nullptr),
|
|
"api registration");
|
|
|
|
ROCP_INFO << id->name << " is using rocprofiler-sdk v" << major << "." << minor << "." << patch
|
|
<< " (" << runtime_version << ")";
|
|
|
|
// create configure data
|
|
static auto cfg = rocprofiler_tool_configure_result_t{
|
|
sizeof(rocprofiler_tool_configure_result_t), &tool_init, &tool_fini, nullptr};
|
|
|
|
// return pointer to configure data
|
|
return &cfg;
|
|
// data passed around all the callbacks
|
|
}
|
|
|
|
void
|
|
rocprofv3_set_main(main_func_t main_func)
|
|
{
|
|
get_main_function() = main_func;
|
|
}
|
|
|
|
#define LOG_FUNCTION_ENTRY(MSG, ...) \
|
|
{ \
|
|
ROCP_INFO << fmt::format("[PPID={}][PID={}][TID={}][rocprofv3] {}" MSG, \
|
|
getppid(), \
|
|
getpid(), \
|
|
rocprofiler::common::get_tid(), \
|
|
__FUNCTION__, \
|
|
__VA_ARGS__); \
|
|
}
|
|
|
|
sighandler_t
|
|
rocprofv3_signal(int signum, sighandler_t handler)
|
|
{
|
|
static auto _once = std::once_flag{};
|
|
std::call_once(_once,
|
|
[]() { get_signal_function() = (signal_func_t) dlsym(RTLD_NEXT, "signal"); });
|
|
|
|
if(!is_handled_signal(signum) || !tool::get_config().enable_signal_handlers)
|
|
return CHECK_NOTNULL(get_signal_function())(signum, handler);
|
|
|
|
get_chained_signals().at(signum) = chained_siginfo{signum, handler, std::nullopt};
|
|
|
|
return get_signal_function()(
|
|
signum, [](int signum_v) { rocprofv3_error_signal_handler(signum_v, nullptr, nullptr); });
|
|
}
|
|
|
|
int
|
|
rocprofv3_sigaction(int signum,
|
|
const struct sigaction* __restrict__ act,
|
|
struct sigaction* __restrict__ oldact)
|
|
{
|
|
static auto _once = std::once_flag{};
|
|
std::call_once(_once, []() {
|
|
get_sigaction_function() = (sigaction_func_t) dlsym(RTLD_NEXT, "sigaction");
|
|
});
|
|
|
|
if(!is_handled_signal(signum) || !act || !tool::get_config().enable_signal_handlers)
|
|
return CHECK_NOTNULL(get_sigaction_function())(signum, act, oldact);
|
|
|
|
// make sure rocprofv3_error_signal_handler doesn't call itself
|
|
if((act->sa_flags & SA_SIGINFO) == SA_SIGINFO &&
|
|
act->sa_sigaction != &rocprofv3_error_signal_handler)
|
|
get_chained_signals().at(signum) = chained_siginfo{signum, nullptr, *act};
|
|
|
|
struct sigaction _upd_act = *act;
|
|
_upd_act.sa_flags |= (SA_SIGINFO | SA_RESETHAND | SA_NOCLDSTOP);
|
|
_upd_act.sa_sigaction = &rocprofv3_error_signal_handler;
|
|
|
|
return get_sigaction_function()(signum, &_upd_act, oldact);
|
|
}
|
|
|
|
int
|
|
rocprofv3_main(int argc, char** argv, char** envp)
|
|
{
|
|
auto convert_to_vec = [](char** inp) {
|
|
auto _data = std::vector<std::string_view>{};
|
|
size_t n = 0;
|
|
const char* p = nullptr;
|
|
if(!inp) return _data;
|
|
do
|
|
{
|
|
p = inp[n++];
|
|
if(p != nullptr) _data.emplace_back(p);
|
|
} while(p != nullptr);
|
|
return _data;
|
|
};
|
|
|
|
auto _argv = convert_to_vec(argv);
|
|
// auto _envp = convect_to_vec(envp);
|
|
|
|
LOG_FUNCTION_ENTRY("({}, '{}', ...)", argc, fmt::join(_argv.begin(), _argv.end(), " "));
|
|
|
|
initialize_logging();
|
|
|
|
initialize_rocprofv3();
|
|
|
|
initialize_signal_handler(get_sigaction_function());
|
|
|
|
ROCP_INFO << "rocprofv3: main function wrapper will be invoked...";
|
|
|
|
auto _main_timer = std::optional<common::simple_timer>{};
|
|
|
|
// should never happen but if it does, don't time
|
|
if(!_argv.empty())
|
|
_main_timer = common::simple_timer{
|
|
fmt::format("[rocprofv3] '{}'", fmt::join(_argv.begin(), _argv.end(), " "))};
|
|
|
|
if(tool_metadata && tool_metadata->process_start_ns == 0)
|
|
rocprofiler_get_timestamp(&(tool_metadata->process_start_ns));
|
|
|
|
auto ret = CHECK_NOTNULL(get_main_function())(argc, argv, envp);
|
|
|
|
if(tool_metadata && tool_metadata->process_end_ns == 0)
|
|
rocprofiler_get_timestamp(&(tool_metadata->process_end_ns));
|
|
|
|
ROCP_INFO << "rocprofv3: main function has returned with exit code: " << ret;
|
|
|
|
// reset so that it reports the timing
|
|
if(_main_timer) _main_timer.reset();
|
|
|
|
finalize_rocprofv3(__FUNCTION__);
|
|
|
|
ROCP_INFO << "rocprofv3 finished. exit code: " << ret;
|
|
return ret;
|
|
}
|
|
}
|