2024-11-07 01:15:19 -06:00
|
|
|
// MIT License
|
|
|
|
|
//
|
2025-01-23 06:41:20 +05:30
|
|
|
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
2024-11-07 01:15:19 -06:00
|
|
|
//
|
|
|
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
|
|
|
// in the Software without restriction, including without limitation the rights
|
|
|
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
|
|
|
// furnished to do so, subject to the following conditions:
|
|
|
|
|
//
|
|
|
|
|
// The above copyright notice and this permission notice shall be included in all
|
|
|
|
|
// copies or substantial portions of the Software.
|
|
|
|
|
//
|
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
|
// SOFTWARE.
|
|
|
|
|
|
|
|
|
|
#include "generatePerfetto.hpp"
|
|
|
|
|
#include "output_stream.hpp"
|
|
|
|
|
#include "timestamps.hpp"
|
|
|
|
|
|
|
|
|
|
#include "lib/common/utility.hpp"
|
|
|
|
|
|
|
|
|
|
#include <rocprofiler-sdk/fwd.h>
|
|
|
|
|
#include <rocprofiler-sdk/marker/api_id.h>
|
2025-10-27 07:58:20 -07:00
|
|
|
#include <rocprofiler-sdk/cxx/constants.hpp>
|
2024-11-07 01:15:19 -06:00
|
|
|
#include <rocprofiler-sdk/cxx/hash.hpp>
|
|
|
|
|
#include <rocprofiler-sdk/cxx/operators.hpp>
|
|
|
|
|
#include <rocprofiler-sdk/cxx/perfetto.hpp>
|
|
|
|
|
|
2025-03-13 22:40:03 -05:00
|
|
|
#include <fmt/core.h>
|
|
|
|
|
|
2024-11-07 01:15:19 -06:00
|
|
|
#include <atomic>
|
|
|
|
|
#include <future>
|
2025-03-13 22:40:03 -05:00
|
|
|
#include <iostream>
|
2024-11-07 01:15:19 -06:00
|
|
|
#include <map>
|
2026-01-27 10:27:54 -08:00
|
|
|
#include <memory>
|
2024-11-07 01:15:19 -06:00
|
|
|
#include <thread>
|
|
|
|
|
#include <unordered_map>
|
|
|
|
|
#include <utility>
|
|
|
|
|
|
|
|
|
|
namespace rocprofiler
|
|
|
|
|
{
|
|
|
|
|
namespace tool
|
|
|
|
|
{
|
|
|
|
|
namespace
|
|
|
|
|
{
|
|
|
|
|
auto main_tid = common::get_tid();
|
|
|
|
|
|
|
|
|
|
template <typename Tp>
|
|
|
|
|
size_t
|
|
|
|
|
get_hash_id(Tp&& _val)
|
|
|
|
|
{
|
|
|
|
|
if constexpr(!std::is_pointer<Tp>::value)
|
|
|
|
|
return std::hash<Tp>{}(std::forward<Tp>(_val));
|
|
|
|
|
else if constexpr(std::is_same<Tp, const char*>::value)
|
|
|
|
|
return get_hash_id(std::string_view{_val});
|
|
|
|
|
else
|
|
|
|
|
return get_hash_id(*_val);
|
|
|
|
|
}
|
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
write_perfetto(
|
2025-07-09 21:05:45 +05:30
|
|
|
const output_config& ocfg,
|
|
|
|
|
const metadata& tool_metadata,
|
|
|
|
|
std::vector<agent_info> agent_data,
|
|
|
|
|
const generator<tool_buffer_tracing_hip_api_ext_record_t>& hip_api_gen,
|
|
|
|
|
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
|
|
|
|
|
const generator<tool_buffer_tracing_kernel_dispatch_ext_record_t>& kernel_dispatch_gen,
|
|
|
|
|
const generator<tool_buffer_tracing_memory_copy_ext_record_t>& memory_copy_gen,
|
|
|
|
|
const generator<tool_counter_record_t>& counter_collection_gen,
|
|
|
|
|
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
|
|
|
|
|
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& scratch_memory_gen,
|
2024-11-18 20:22:14 -06:00
|
|
|
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& rccl_api_gen,
|
2025-05-18 20:11:26 -05:00
|
|
|
const generator<tool_buffer_tracing_memory_allocation_ext_record_t>& memory_allocation_gen,
|
2025-04-11 16:56:36 -05:00
|
|
|
const generator<rocprofiler_buffer_tracing_rocdecode_api_ext_record_t>& rocdecode_api_gen,
|
2025-02-21 15:43:49 -06:00
|
|
|
const generator<rocprofiler_buffer_tracing_rocjpeg_api_record_t>& rocjpeg_api_gen)
|
2024-11-07 01:15:19 -06:00
|
|
|
{
|
|
|
|
|
namespace sdk = ::rocprofiler::sdk;
|
|
|
|
|
|
|
|
|
|
// auto root_process_track = ::perfetto::Track{};
|
|
|
|
|
// uint64_t process_uuid = tool_metadata.process_start_ns ^ tool_metadata.process_id;
|
|
|
|
|
// auto process_track = ::perfetto::Track{process_uuid, root_process_track};
|
|
|
|
|
// auto process_track = ::perfetto::ProcessTrack::Current();
|
|
|
|
|
|
|
|
|
|
auto agents_map = std::unordered_map<rocprofiler_agent_id_t, rocprofiler_agent_t>{};
|
|
|
|
|
for(auto itr : agent_data)
|
|
|
|
|
agents_map.emplace(itr.id, itr);
|
|
|
|
|
|
|
|
|
|
auto args = ::perfetto::TracingInitArgs{};
|
|
|
|
|
auto track_event_cfg = ::perfetto::protos::gen::TrackEventConfig{};
|
|
|
|
|
auto cfg = ::perfetto::TraceConfig{};
|
|
|
|
|
|
|
|
|
|
// environment settings
|
|
|
|
|
auto shmem_size_hint = ocfg.perfetto_shmem_size_hint;
|
|
|
|
|
auto buffer_size_kb = ocfg.perfetto_buffer_size;
|
|
|
|
|
|
|
|
|
|
auto* buffer_config = cfg.add_buffers();
|
|
|
|
|
buffer_config->set_size_kb(buffer_size_kb);
|
|
|
|
|
|
|
|
|
|
if(ocfg.perfetto_buffer_fill_policy == "discard" || ocfg.perfetto_buffer_fill_policy.empty())
|
|
|
|
|
buffer_config->set_fill_policy(
|
|
|
|
|
::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD);
|
|
|
|
|
else if(ocfg.perfetto_buffer_fill_policy == "ring_buffer")
|
|
|
|
|
buffer_config->set_fill_policy(
|
|
|
|
|
::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_RING_BUFFER);
|
|
|
|
|
else
|
|
|
|
|
ROCP_FATAL << "Unsupport perfetto buffer fill policy: '" << ocfg.perfetto_buffer_fill_policy
|
|
|
|
|
<< "'. Supported: discard, ring_buffer";
|
|
|
|
|
|
|
|
|
|
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
|
|
|
|
|
ds_cfg->set_name("track_event"); // this MUST be track_event
|
|
|
|
|
ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
|
|
|
|
|
|
|
|
|
|
args.shmem_size_hint_kb = shmem_size_hint;
|
|
|
|
|
|
|
|
|
|
if(ocfg.perfetto_backend == "inprocess" || ocfg.perfetto_backend.empty())
|
|
|
|
|
args.backends |= ::perfetto::kInProcessBackend;
|
|
|
|
|
else if(ocfg.perfetto_backend == "system")
|
|
|
|
|
args.backends |= ::perfetto::kSystemBackend;
|
|
|
|
|
else
|
|
|
|
|
ROCP_FATAL << "Unsupport perfetto backend: '" << ocfg.perfetto_backend
|
|
|
|
|
<< "'. Supported: inprocess, system";
|
|
|
|
|
|
|
|
|
|
::perfetto::Tracing::Initialize(args);
|
|
|
|
|
::perfetto::TrackEvent::Register();
|
|
|
|
|
|
|
|
|
|
auto tracing_session = ::perfetto::Tracing::NewTrace();
|
|
|
|
|
|
|
|
|
|
tracing_session->Setup(cfg);
|
|
|
|
|
tracing_session->StartBlocking();
|
2025-05-01 00:56:15 -05:00
|
|
|
const auto is_hip_initialized =
|
|
|
|
|
tool_metadata.is_runtime_initialized(ROCPROFILER_RUNTIME_INITIALIZATION_HIP);
|
|
|
|
|
const auto group_by_queue = ocfg.group_by_queue || !is_hip_initialized;
|
|
|
|
|
auto tids = std::set<rocprofiler_thread_id_t>{};
|
|
|
|
|
auto demangled = std::unordered_map<std::string_view, std::string>{};
|
|
|
|
|
auto agent_thread_ids = std::unordered_map<rocprofiler_agent_id_t, std::set<uint64_t>>{};
|
2024-11-18 20:22:14 -06:00
|
|
|
auto agent_thread_ids_alloc = std::unordered_map<rocprofiler_agent_id_t, std::set<uint64_t>>{};
|
2024-11-07 01:15:19 -06:00
|
|
|
auto agent_queue_ids =
|
|
|
|
|
std::unordered_map<rocprofiler_agent_id_t, std::unordered_set<rocprofiler_queue_id_t>>{};
|
2025-06-26 14:22:50 -05:00
|
|
|
auto agent_stream_ids = std::unordered_set<rocprofiler_stream_id_t>{};
|
|
|
|
|
auto thread_indexes = std::unordered_map<rocprofiler_thread_id_t, uint64_t>{};
|
2024-11-07 01:15:19 -06:00
|
|
|
|
|
|
|
|
auto thread_tracks = std::unordered_map<rocprofiler_thread_id_t, ::perfetto::Track>{};
|
|
|
|
|
auto agent_thread_tracks =
|
|
|
|
|
std::unordered_map<rocprofiler_agent_id_t,
|
|
|
|
|
std::unordered_map<uint64_t, ::perfetto::Track>>{};
|
2024-11-18 20:22:14 -06:00
|
|
|
auto agent_thread_tracks_alloc =
|
|
|
|
|
std::unordered_map<rocprofiler_agent_id_t,
|
|
|
|
|
std::unordered_map<uint64_t, ::perfetto::Track>>{};
|
2024-11-07 01:15:19 -06:00
|
|
|
auto agent_queue_tracks =
|
|
|
|
|
std::unordered_map<rocprofiler_agent_id_t,
|
|
|
|
|
std::unordered_map<rocprofiler_queue_id_t, ::perfetto::Track>>{};
|
2025-05-01 00:56:15 -05:00
|
|
|
auto stream_tracks = std::unordered_map<rocprofiler_stream_id_t, ::perfetto::Track>{};
|
2024-11-07 01:15:19 -06:00
|
|
|
|
|
|
|
|
auto _get_agent = [&agent_data](rocprofiler_agent_id_t _id) -> const rocprofiler_agent_t* {
|
|
|
|
|
for(const auto& itr : agent_data)
|
|
|
|
|
{
|
|
|
|
|
if(_id == itr.id) return &itr;
|
|
|
|
|
}
|
|
|
|
|
return CHECK_NOTNULL(nullptr);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
for(auto ditr : hsa_api_gen)
|
|
|
|
|
for(auto itr : hsa_api_gen.get(ditr))
|
|
|
|
|
tids.emplace(itr.thread_id);
|
|
|
|
|
for(auto ditr : hip_api_gen)
|
|
|
|
|
for(auto itr : hip_api_gen.get(ditr))
|
|
|
|
|
tids.emplace(itr.thread_id);
|
|
|
|
|
for(auto ditr : marker_api_gen)
|
|
|
|
|
for(auto itr : marker_api_gen.get(ditr))
|
|
|
|
|
tids.emplace(itr.thread_id);
|
|
|
|
|
for(auto ditr : rccl_api_gen)
|
|
|
|
|
for(auto itr : rccl_api_gen.get(ditr))
|
|
|
|
|
tids.emplace(itr.thread_id);
|
2025-01-17 16:42:25 -06:00
|
|
|
for(auto ditr : rocdecode_api_gen)
|
|
|
|
|
for(auto itr : rocdecode_api_gen.get(ditr))
|
|
|
|
|
tids.emplace(itr.thread_id);
|
2025-02-21 15:43:49 -06:00
|
|
|
for(auto ditr : rocjpeg_api_gen)
|
|
|
|
|
for(auto itr : rocjpeg_api_gen.get(ditr))
|
|
|
|
|
tids.emplace(itr.thread_id);
|
2024-11-07 01:15:19 -06:00
|
|
|
|
|
|
|
|
for(auto ditr : memory_copy_gen)
|
|
|
|
|
for(auto itr : memory_copy_gen.get(ditr))
|
|
|
|
|
{
|
|
|
|
|
tids.emplace(itr.thread_id);
|
2025-06-26 14:22:50 -05:00
|
|
|
agent_stream_ids.emplace(itr.stream_id);
|
2025-05-01 00:56:15 -05:00
|
|
|
if(group_by_queue)
|
2025-03-14 04:45:13 -05:00
|
|
|
{
|
|
|
|
|
agent_thread_ids[itr.dst_agent_id].emplace(itr.thread_id);
|
|
|
|
|
}
|
2024-11-07 01:15:19 -06:00
|
|
|
}
|
|
|
|
|
|
2024-11-18 20:22:14 -06:00
|
|
|
for(auto ditr : memory_allocation_gen)
|
|
|
|
|
for(auto itr : memory_allocation_gen.get(ditr))
|
|
|
|
|
{
|
|
|
|
|
tids.emplace(itr.thread_id);
|
|
|
|
|
agent_thread_ids_alloc[itr.agent_id].emplace(itr.thread_id);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 01:15:19 -06:00
|
|
|
for(auto ditr : kernel_dispatch_gen)
|
|
|
|
|
for(auto itr : kernel_dispatch_gen.get(ditr))
|
|
|
|
|
{
|
|
|
|
|
tids.emplace(itr.thread_id);
|
2025-06-26 14:22:50 -05:00
|
|
|
agent_stream_ids.emplace(itr.stream_id);
|
2025-05-01 00:56:15 -05:00
|
|
|
if(group_by_queue)
|
2025-03-14 04:45:13 -05:00
|
|
|
{
|
|
|
|
|
agent_queue_ids[itr.dispatch_info.agent_id].emplace(itr.dispatch_info.queue_id);
|
|
|
|
|
}
|
2024-11-07 01:15:19 -06:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t nthrn = 0;
|
|
|
|
|
for(auto itr : tids)
|
|
|
|
|
{
|
|
|
|
|
if(itr == main_tid)
|
|
|
|
|
{
|
|
|
|
|
thread_indexes.emplace(main_tid, 0);
|
|
|
|
|
thread_tracks.emplace(main_tid, ::perfetto::ThreadTrack::Current());
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
auto _idx = ++nthrn;
|
|
|
|
|
thread_indexes.emplace(itr, _idx);
|
|
|
|
|
auto _track = ::perfetto::Track{itr};
|
|
|
|
|
auto _desc = _track.Serialize();
|
|
|
|
|
auto _namess = std::stringstream{};
|
|
|
|
|
_namess << "THREAD " << _idx << " (" << itr << ")";
|
|
|
|
|
_desc.set_name(_namess.str());
|
|
|
|
|
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
|
|
|
|
|
|
|
|
|
thread_tracks.emplace(itr, _track);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for(const auto& itr : agent_thread_ids)
|
|
|
|
|
{
|
|
|
|
|
const auto* _agent = _get_agent(itr.first);
|
|
|
|
|
|
|
|
|
|
for(auto titr : itr.second)
|
|
|
|
|
{
|
|
|
|
|
auto _namess = std::stringstream{};
|
|
|
|
|
_namess << "COPY to AGENT [" << _agent->logical_node_id << "] THREAD ["
|
|
|
|
|
<< thread_indexes.at(titr) << "] ";
|
|
|
|
|
|
|
|
|
|
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
|
|
|
|
_namess << "(CPU)";
|
|
|
|
|
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
|
|
|
|
_namess << "(GPU)";
|
|
|
|
|
else
|
|
|
|
|
_namess << "(UNK)";
|
|
|
|
|
|
|
|
|
|
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
|
|
|
|
|
auto _desc = _track.Serialize();
|
|
|
|
|
_desc.set_name(_namess.str());
|
|
|
|
|
|
|
|
|
|
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
|
|
|
|
|
|
|
|
|
agent_thread_tracks[itr.first].emplace(titr, _track);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for(const auto& aitr : agent_queue_ids)
|
|
|
|
|
{
|
|
|
|
|
uint32_t nqueue = 0;
|
|
|
|
|
for(auto qitr : aitr.second)
|
|
|
|
|
{
|
|
|
|
|
const auto* _agent = _get_agent(aitr.first);
|
|
|
|
|
|
|
|
|
|
auto _namess = std::stringstream{};
|
2025-03-14 02:51:32 -05:00
|
|
|
auto agent_index_info =
|
|
|
|
|
tool_metadata.get_agent_index(_agent->id, ocfg.agent_index_value);
|
|
|
|
|
_namess << "COMPUTE " << agent_index_info.label << " [" << agent_index_info.index
|
|
|
|
|
<< "] QUEUE [" << nqueue++ << "] ";
|
|
|
|
|
_namess << agent_index_info.type;
|
2024-11-07 01:15:19 -06:00
|
|
|
|
|
|
|
|
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
|
|
|
|
|
auto _desc = _track.Serialize();
|
|
|
|
|
_desc.set_name(_namess.str());
|
|
|
|
|
|
|
|
|
|
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
|
|
|
|
|
|
|
|
|
agent_queue_tracks[aitr.first].emplace(qitr, _track);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-26 14:22:50 -05:00
|
|
|
for(const auto& sitr : agent_stream_ids)
|
2025-03-14 04:45:13 -05:00
|
|
|
{
|
2025-06-26 14:22:50 -05:00
|
|
|
const auto stream_id = sitr.handle;
|
2025-03-14 04:45:13 -05:00
|
|
|
|
2025-06-26 14:22:50 -05:00
|
|
|
{
|
|
|
|
|
auto _namess = std::stringstream{};
|
|
|
|
|
_namess << fmt::format("STREAM [\" {} \"] ", stream_id);
|
2025-03-14 04:45:13 -05:00
|
|
|
|
2025-06-26 14:22:50 -05:00
|
|
|
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
|
|
|
|
|
auto _desc = _track.Serialize();
|
|
|
|
|
_desc.set_name(_namess.str());
|
2025-03-14 04:45:13 -05:00
|
|
|
|
2025-06-26 14:22:50 -05:00
|
|
|
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
2025-03-14 04:45:13 -05:00
|
|
|
|
2025-06-26 14:22:50 -05:00
|
|
|
stream_tracks.emplace(sitr, _track);
|
2025-03-14 04:45:13 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-02-20 10:14:37 +00:00
|
|
|
auto counter_id_to_name = std::unordered_map<rocprofiler_counter_id_t, std::string_view>{};
|
2025-06-09 13:14:55 +05:30
|
|
|
for(const auto& itr : tool_metadata.get_counter_info())
|
2025-10-27 07:58:20 -07:00
|
|
|
{
|
|
|
|
|
// Counter records now contain agent-encoded IDs (reconstructed in tool.cpp),
|
|
|
|
|
// so we use the full agent-encoded ID from metadata as the map key
|
2025-06-09 13:14:55 +05:30
|
|
|
counter_id_to_name.emplace(itr.id, itr.name);
|
2025-10-27 07:58:20 -07:00
|
|
|
}
|
2025-06-09 13:14:55 +05:30
|
|
|
|
|
|
|
|
// Map: correlation_id -> map<counter_id, value>
|
|
|
|
|
auto dispatch_counter_id_value =
|
|
|
|
|
std::unordered_map<uint64_t, std::unordered_map<rocprofiler_counter_id_t, double>>{};
|
2025-02-20 10:14:37 +00:00
|
|
|
|
2024-11-07 01:15:19 -06:00
|
|
|
// trace events
|
|
|
|
|
{
|
|
|
|
|
auto buffer_names = sdk::get_buffer_tracing_names();
|
|
|
|
|
auto callbk_name_info = sdk::get_callback_tracing_names();
|
|
|
|
|
|
|
|
|
|
for(auto ditr : hsa_api_gen)
|
|
|
|
|
for(auto itr : hsa_api_gen.get(ditr))
|
|
|
|
|
{
|
|
|
|
|
auto name = buffer_names.at(itr.kind, itr.operation);
|
|
|
|
|
auto& track = thread_tracks.at(itr.thread_id);
|
|
|
|
|
|
|
|
|
|
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::hsa_api>::name,
|
|
|
|
|
::perfetto::StaticString(name.data()),
|
|
|
|
|
track,
|
|
|
|
|
itr.start_timestamp,
|
|
|
|
|
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
|
|
|
|
"begin_ns",
|
|
|
|
|
itr.start_timestamp,
|
|
|
|
|
"end_ns",
|
|
|
|
|
itr.end_timestamp,
|
|
|
|
|
"delta_ns",
|
|
|
|
|
(itr.end_timestamp - itr.start_timestamp),
|
|
|
|
|
"tid",
|
|
|
|
|
itr.thread_id,
|
|
|
|
|
"kind",
|
|
|
|
|
itr.kind,
|
|
|
|
|
"operation",
|
|
|
|
|
itr.operation,
|
|
|
|
|
"corr_id",
|
2025-03-20 22:52:48 -07:00
|
|
|
itr.correlation_id.internal,
|
|
|
|
|
"ancestor_id",
|
|
|
|
|
itr.correlation_id.ancestor);
|
|
|
|
|
|
2024-11-07 01:15:19 -06:00
|
|
|
TRACE_EVENT_END(
|
|
|
|
|
sdk::perfetto_category<sdk::category::hsa_api>::name, track, itr.end_timestamp);
|
|
|
|
|
tracing_session->FlushBlocking();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for(auto ditr : hip_api_gen)
|
|
|
|
|
for(auto itr : hip_api_gen.get(ditr))
|
|
|
|
|
{
|
|
|
|
|
auto name = buffer_names.at(itr.kind, itr.operation);
|
|
|
|
|
auto& track = thread_tracks.at(itr.thread_id);
|
|
|
|
|
|
|
|
|
|
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::hip_api>::name,
|
|
|
|
|
::perfetto::StaticString(name.data()),
|
|
|
|
|
track,
|
|
|
|
|
itr.start_timestamp,
|
|
|
|
|
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
|
|
|
|
"begin_ns",
|
|
|
|
|
itr.start_timestamp,
|
|
|
|
|
"end_ns",
|
|
|
|
|
itr.end_timestamp,
|
|
|
|
|
"delta_ns",
|
|
|
|
|
(itr.end_timestamp - itr.start_timestamp),
|
|
|
|
|
"tid",
|
|
|
|
|
itr.thread_id,
|
|
|
|
|
"kind",
|
|
|
|
|
itr.kind,
|
|
|
|
|
"operation",
|
|
|
|
|
itr.operation,
|
|
|
|
|
"corr_id",
|
2025-03-20 22:52:48 -07:00
|
|
|
itr.correlation_id.internal,
|
|
|
|
|
"ancestor_id",
|
2025-06-26 14:22:50 -05:00
|
|
|
itr.correlation_id.ancestor,
|
|
|
|
|
"stream_ID",
|
|
|
|
|
itr.stream_id.handle);
|
2025-03-20 22:52:48 -07:00
|
|
|
|
2024-11-07 01:15:19 -06:00
|
|
|
TRACE_EVENT_END(
|
|
|
|
|
sdk::perfetto_category<sdk::category::hip_api>::name, track, itr.end_timestamp);
|
|
|
|
|
tracing_session->FlushBlocking();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for(auto ditr : marker_api_gen)
|
|
|
|
|
for(auto itr : marker_api_gen.get(ditr))
|
|
|
|
|
{
|
|
|
|
|
auto& track = thread_tracks.at(itr.thread_id);
|
2025-07-08 23:41:22 -07:00
|
|
|
auto name = (itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API &&
|
|
|
|
|
itr.operation != ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId)
|
2024-11-07 01:15:19 -06:00
|
|
|
? tool_metadata.get_marker_message(itr.correlation_id.internal)
|
|
|
|
|
: buffer_names.at(itr.kind, itr.operation);
|
|
|
|
|
|
|
|
|
|
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::marker_api>::name,
|
|
|
|
|
::perfetto::StaticString(name.data()),
|
|
|
|
|
track,
|
|
|
|
|
itr.start_timestamp,
|
|
|
|
|
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
|
|
|
|
"begin_ns",
|
|
|
|
|
itr.start_timestamp,
|
|
|
|
|
"end_ns",
|
|
|
|
|
itr.end_timestamp,
|
|
|
|
|
"delta_ns",
|
|
|
|
|
(itr.end_timestamp - itr.start_timestamp),
|
|
|
|
|
"tid",
|
|
|
|
|
itr.thread_id,
|
|
|
|
|
"kind",
|
|
|
|
|
itr.kind,
|
|
|
|
|
"operation",
|
|
|
|
|
itr.operation,
|
|
|
|
|
"corr_id",
|
2025-03-20 22:52:48 -07:00
|
|
|
itr.correlation_id.internal,
|
|
|
|
|
"ancestor_id",
|
|
|
|
|
itr.correlation_id.ancestor);
|
2024-11-07 01:15:19 -06:00
|
|
|
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::marker_api>::name,
|
|
|
|
|
track,
|
|
|
|
|
itr.end_timestamp);
|
|
|
|
|
tracing_session->FlushBlocking();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for(auto ditr : rccl_api_gen)
|
|
|
|
|
for(auto itr : rccl_api_gen.get(ditr))
|
|
|
|
|
{
|
|
|
|
|
auto name = buffer_names.at(itr.kind, itr.operation);
|
|
|
|
|
auto& track = thread_tracks.at(itr.thread_id);
|
|
|
|
|
|
|
|
|
|
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::rccl_api>::name,
|
|
|
|
|
::perfetto::StaticString(name.data()),
|
|
|
|
|
track,
|
|
|
|
|
itr.start_timestamp,
|
|
|
|
|
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
|
|
|
|
"begin_ns",
|
|
|
|
|
itr.start_timestamp,
|
|
|
|
|
"end_ns",
|
|
|
|
|
itr.end_timestamp,
|
|
|
|
|
"delta_ns",
|
|
|
|
|
(itr.end_timestamp - itr.start_timestamp),
|
|
|
|
|
"tid",
|
|
|
|
|
itr.thread_id,
|
|
|
|
|
"kind",
|
|
|
|
|
itr.kind,
|
|
|
|
|
"operation",
|
|
|
|
|
itr.operation,
|
|
|
|
|
"corr_id",
|
2025-03-20 22:52:48 -07:00
|
|
|
itr.correlation_id.internal,
|
|
|
|
|
"ancestor_id",
|
|
|
|
|
itr.correlation_id.ancestor);
|
2024-11-07 01:15:19 -06:00
|
|
|
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::rccl_api>::name,
|
|
|
|
|
track,
|
2025-01-17 16:42:25 -06:00
|
|
|
itr.end_timestamp);
|
|
|
|
|
tracing_session->FlushBlocking();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for(auto ditr : rocdecode_api_gen)
|
|
|
|
|
for(auto itr : rocdecode_api_gen.get(ditr))
|
|
|
|
|
{
|
2025-04-11 16:56:36 -05:00
|
|
|
auto name = buffer_names.at(itr.kind, itr.operation);
|
|
|
|
|
auto& track = thread_tracks.at(itr.thread_id);
|
|
|
|
|
auto rocdecode_args = sdk::serialization::get_buffer_tracing_args(itr);
|
2025-01-17 16:42:25 -06:00
|
|
|
|
|
|
|
|
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::rocdecode_api>::name,
|
|
|
|
|
::perfetto::StaticString(name.data()),
|
|
|
|
|
track,
|
|
|
|
|
itr.start_timestamp,
|
|
|
|
|
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
|
|
|
|
"begin_ns",
|
|
|
|
|
itr.start_timestamp,
|
|
|
|
|
"end_ns",
|
|
|
|
|
itr.end_timestamp,
|
|
|
|
|
"delta_ns",
|
|
|
|
|
(itr.end_timestamp - itr.start_timestamp),
|
|
|
|
|
"tid",
|
|
|
|
|
itr.thread_id,
|
|
|
|
|
"kind",
|
|
|
|
|
itr.kind,
|
|
|
|
|
"operation",
|
|
|
|
|
itr.operation,
|
|
|
|
|
"corr_id",
|
2025-03-20 22:52:48 -07:00
|
|
|
itr.correlation_id.internal,
|
|
|
|
|
"ancestor_id",
|
2025-04-11 16:56:36 -05:00
|
|
|
itr.correlation_id.ancestor,
|
|
|
|
|
[&](::perfetto::EventContext ctx) {
|
|
|
|
|
for(const auto& rocdecode_arg : rocdecode_args)
|
|
|
|
|
{
|
|
|
|
|
sdk::add_perfetto_annotation(
|
|
|
|
|
ctx, rocdecode_arg.name, rocdecode_arg.value);
|
|
|
|
|
}
|
|
|
|
|
});
|
2025-01-17 16:42:25 -06:00
|
|
|
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::rocdecode_api>::name,
|
|
|
|
|
track,
|
2024-11-07 01:15:19 -06:00
|
|
|
itr.end_timestamp);
|
|
|
|
|
tracing_session->FlushBlocking();
|
|
|
|
|
}
|
|
|
|
|
|
2025-02-21 15:43:49 -06:00
|
|
|
for(auto ditr : rocjpeg_api_gen)
|
|
|
|
|
for(auto itr : rocjpeg_api_gen.get(ditr))
|
|
|
|
|
{
|
|
|
|
|
auto name = buffer_names.at(itr.kind, itr.operation);
|
|
|
|
|
auto& track = thread_tracks.at(itr.thread_id);
|
|
|
|
|
|
|
|
|
|
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::rocjpeg_api>::name,
|
|
|
|
|
::perfetto::StaticString(name.data()),
|
|
|
|
|
track,
|
|
|
|
|
itr.start_timestamp,
|
|
|
|
|
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
|
|
|
|
"begin_ns",
|
|
|
|
|
itr.start_timestamp,
|
|
|
|
|
"end_ns",
|
|
|
|
|
itr.end_timestamp,
|
|
|
|
|
"delta_ns",
|
|
|
|
|
(itr.end_timestamp - itr.start_timestamp),
|
|
|
|
|
"tid",
|
|
|
|
|
itr.thread_id,
|
|
|
|
|
"kind",
|
|
|
|
|
itr.kind,
|
|
|
|
|
"operation",
|
|
|
|
|
itr.operation,
|
|
|
|
|
"corr_id",
|
2025-03-20 22:52:48 -07:00
|
|
|
itr.correlation_id.internal,
|
|
|
|
|
"ancestor_id",
|
|
|
|
|
itr.correlation_id.ancestor);
|
2025-02-21 15:43:49 -06:00
|
|
|
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::rocjpeg_api>::name,
|
|
|
|
|
track,
|
|
|
|
|
itr.end_timestamp);
|
|
|
|
|
tracing_session->FlushBlocking();
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 01:15:19 -06:00
|
|
|
for(auto ditr : memory_copy_gen)
|
|
|
|
|
for(auto itr : memory_copy_gen.get(ditr))
|
|
|
|
|
{
|
2025-03-14 04:45:13 -05:00
|
|
|
auto name = buffer_names.at(itr.kind, itr.operation);
|
|
|
|
|
|
|
|
|
|
::perfetto::Track* _track = nullptr;
|
2025-05-01 00:56:15 -05:00
|
|
|
if(group_by_queue)
|
2025-03-14 04:45:13 -05:00
|
|
|
{
|
|
|
|
|
_track = &agent_thread_tracks.at(itr.dst_agent_id).at(itr.thread_id);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2025-05-01 00:56:15 -05:00
|
|
|
_track = &stream_tracks.at(itr.stream_id);
|
2025-03-14 04:45:13 -05:00
|
|
|
}
|
2024-11-07 01:15:19 -06:00
|
|
|
|
2025-03-14 02:51:32 -05:00
|
|
|
TRACE_EVENT_BEGIN(
|
|
|
|
|
sdk::perfetto_category<sdk::category::memory_copy>::name,
|
|
|
|
|
::perfetto::StaticString(name.data()),
|
2025-03-14 04:45:13 -05:00
|
|
|
*_track,
|
2025-03-14 02:51:32 -05:00
|
|
|
itr.start_timestamp,
|
|
|
|
|
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
|
|
|
|
"begin_ns",
|
|
|
|
|
itr.start_timestamp,
|
|
|
|
|
"end_ns",
|
|
|
|
|
itr.end_timestamp,
|
|
|
|
|
"delta_ns",
|
|
|
|
|
(itr.end_timestamp - itr.start_timestamp),
|
|
|
|
|
"kind",
|
|
|
|
|
itr.kind,
|
|
|
|
|
"operation",
|
|
|
|
|
itr.operation,
|
|
|
|
|
"src_agent",
|
|
|
|
|
tool_metadata.get_agent_index(itr.src_agent_id, ocfg.agent_index_value)
|
|
|
|
|
.as_string("-"),
|
|
|
|
|
"dst_agent",
|
|
|
|
|
tool_metadata.get_agent_index(itr.dst_agent_id, ocfg.agent_index_value)
|
|
|
|
|
.as_string("-"),
|
|
|
|
|
"copy_bytes",
|
|
|
|
|
itr.bytes,
|
|
|
|
|
"corr_id",
|
|
|
|
|
itr.correlation_id.internal,
|
|
|
|
|
"tid",
|
2025-06-26 14:22:50 -05:00
|
|
|
itr.thread_id,
|
|
|
|
|
"stream_ID",
|
|
|
|
|
itr.stream_id.handle);
|
2024-11-07 01:15:19 -06:00
|
|
|
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::memory_copy>::name,
|
2025-03-14 04:45:13 -05:00
|
|
|
*_track,
|
2024-11-07 01:15:19 -06:00
|
|
|
itr.end_timestamp);
|
2025-03-14 04:45:13 -05:00
|
|
|
|
2024-11-07 01:15:19 -06:00
|
|
|
tracing_session->FlushBlocking();
|
|
|
|
|
}
|
2025-02-20 10:14:37 +00:00
|
|
|
|
|
|
|
|
for(auto ditr : counter_collection_gen)
|
|
|
|
|
for(const auto& record : counter_collection_gen.get(ditr))
|
|
|
|
|
{
|
2025-06-09 13:14:55 +05:30
|
|
|
auto& counter_id_value =
|
|
|
|
|
dispatch_counter_id_value[record.dispatch_data.correlation_id.internal];
|
2025-02-20 10:14:37 +00:00
|
|
|
auto record_vector = record.read();
|
|
|
|
|
|
2025-06-09 13:14:55 +05:30
|
|
|
// Accumulate counters based on ID for this dispatch
|
2025-02-20 10:14:37 +00:00
|
|
|
for(auto& count : record_vector)
|
|
|
|
|
{
|
|
|
|
|
counter_id_value[count.id] += count.value;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 01:15:19 -06:00
|
|
|
for(auto ditr : kernel_dispatch_gen)
|
2025-03-13 22:40:03 -05:00
|
|
|
{
|
|
|
|
|
auto generator = kernel_dispatch_gen.get(ditr);
|
|
|
|
|
// Group kernels on the same queue and agent. Temporary fix for firmware timestamp bug
|
|
|
|
|
// Can be removed once bug is resolved.
|
|
|
|
|
auto dispatch_bins = std::unordered_map<
|
|
|
|
|
rocprofiler_agent_id_t,
|
|
|
|
|
std::unordered_map<
|
|
|
|
|
rocprofiler_queue_id_t,
|
2025-05-01 00:56:15 -05:00
|
|
|
std::vector<tool_buffer_tracing_kernel_dispatch_ext_record_t*>>>{};
|
2025-03-13 22:40:03 -05:00
|
|
|
for(auto& itr : generator)
|
2024-11-07 01:15:19 -06:00
|
|
|
{
|
2025-03-13 22:40:03 -05:00
|
|
|
const auto& info = itr.dispatch_info;
|
|
|
|
|
dispatch_bins[info.agent_id][info.queue_id].emplace_back(&itr);
|
|
|
|
|
}
|
2024-11-07 01:15:19 -06:00
|
|
|
|
2025-03-13 22:40:03 -05:00
|
|
|
for(const auto& aitr : dispatch_bins)
|
|
|
|
|
{
|
|
|
|
|
for(auto qitr : aitr.second)
|
2024-11-07 01:15:19 -06:00
|
|
|
{
|
2025-03-13 22:40:03 -05:00
|
|
|
// Sort kernels on the same queue and agent by timestamp
|
|
|
|
|
std::sort(qitr.second.begin(),
|
|
|
|
|
qitr.second.end(),
|
|
|
|
|
[](const auto* lhs, const auto* rhs) {
|
|
|
|
|
return lhs->start_timestamp < rhs->start_timestamp;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Loop over the kernels (qitr.second) and put them into perfetto.
|
|
|
|
|
for(auto it = qitr.second.begin(); it != qitr.second.end(); ++it)
|
|
|
|
|
{
|
|
|
|
|
auto& current = **it;
|
|
|
|
|
const auto& info = current.dispatch_info;
|
|
|
|
|
const kernel_symbol_info* sym =
|
|
|
|
|
tool_metadata.get_kernel_symbol(info.kernel_id);
|
|
|
|
|
|
|
|
|
|
CHECK(sym != nullptr);
|
|
|
|
|
|
2025-03-14 04:45:13 -05:00
|
|
|
auto name = std::string_view{sym->kernel_name};
|
|
|
|
|
|
2025-06-26 14:22:50 -05:00
|
|
|
::perfetto::Track* _track = nullptr;
|
|
|
|
|
auto stream_id = (*it)->stream_id;
|
2025-05-01 00:56:15 -05:00
|
|
|
if(group_by_queue)
|
2025-03-14 04:45:13 -05:00
|
|
|
{
|
|
|
|
|
_track = &agent_queue_tracks.at(info.agent_id).at(info.queue_id);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2025-06-26 14:22:50 -05:00
|
|
|
_track = &stream_tracks.at(stream_id);
|
2025-03-14 04:45:13 -05:00
|
|
|
}
|
2025-03-13 22:40:03 -05:00
|
|
|
|
|
|
|
|
// Temporary fix until timestamp issues are resolved: Set timestamps to be
|
|
|
|
|
// halfway between ending timestamp and starting timestamp of overlapping
|
|
|
|
|
// kernel dispatches. Perfetto displays slices incorrectly if overlapping
|
|
|
|
|
// slices on the same track are not completely enveloped.
|
|
|
|
|
auto next = std::next(it);
|
|
|
|
|
if(next != qitr.second.end() &&
|
|
|
|
|
(*next)->start_timestamp < (*it)->end_timestamp)
|
|
|
|
|
{
|
|
|
|
|
auto start = (*next)->start_timestamp;
|
|
|
|
|
auto end = std::min((*it)->end_timestamp, (*next)->end_timestamp);
|
|
|
|
|
auto mid = start + (end - start) / 2;
|
|
|
|
|
// Report changed timestamps to ROCP INFO
|
|
|
|
|
ROCP_INFO << fmt::format(
|
|
|
|
|
"Kernel ending timestamp increased by {} ns to {} ns with "
|
|
|
|
|
"following kernel starting timestamp decreased by {} ns to {} ns "
|
|
|
|
|
"due to firmware timestamp error.",
|
|
|
|
|
((*it)->end_timestamp - mid),
|
|
|
|
|
mid,
|
|
|
|
|
(mid - (*next)->start_timestamp),
|
|
|
|
|
mid);
|
|
|
|
|
(*it)->end_timestamp = mid;
|
|
|
|
|
(*next)->start_timestamp = mid;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(demangled.find(name) == demangled.end())
|
|
|
|
|
{
|
|
|
|
|
demangled.emplace(name, common::cxx_demangle(name));
|
|
|
|
|
}
|
2025-06-26 14:22:50 -05:00
|
|
|
// Queue IDs are 1 higher than the track name. Subtracting 1 for consistency
|
|
|
|
|
auto queue_id = info.queue_id.handle > 0 ? info.queue_id.handle - 1 : 0;
|
2025-03-13 22:40:03 -05:00
|
|
|
|
|
|
|
|
TRACE_EVENT_BEGIN(
|
|
|
|
|
sdk::perfetto_category<sdk::category::kernel_dispatch>::name,
|
|
|
|
|
::perfetto::StaticString(demangled.at(name).c_str()),
|
2025-03-14 04:45:13 -05:00
|
|
|
*_track,
|
2025-03-13 22:40:03 -05:00
|
|
|
current.start_timestamp,
|
|
|
|
|
::perfetto::Flow::ProcessScoped(current.correlation_id.internal),
|
|
|
|
|
"begin_ns",
|
|
|
|
|
current.start_timestamp,
|
|
|
|
|
"end_ns",
|
|
|
|
|
current.end_timestamp,
|
|
|
|
|
"delta_ns",
|
|
|
|
|
(current.end_timestamp - current.start_timestamp),
|
|
|
|
|
"kind",
|
|
|
|
|
current.kind,
|
|
|
|
|
"agent",
|
2025-03-14 02:51:32 -05:00
|
|
|
tool_metadata
|
|
|
|
|
.get_agent_index(agents_map.at(info.agent_id).id,
|
|
|
|
|
ocfg.agent_index_value)
|
|
|
|
|
.as_string("-"),
|
2025-05-01 00:56:15 -05:00
|
|
|
"agent_type",
|
|
|
|
|
tool_metadata
|
|
|
|
|
.get_agent_index(agents_map.at(info.agent_id).id,
|
|
|
|
|
ocfg.agent_index_value)
|
|
|
|
|
.type,
|
2025-03-13 22:40:03 -05:00
|
|
|
"corr_id",
|
|
|
|
|
current.correlation_id.internal,
|
|
|
|
|
"queue",
|
2025-06-26 14:22:50 -05:00
|
|
|
queue_id,
|
2025-03-13 22:40:03 -05:00
|
|
|
"tid",
|
|
|
|
|
current.thread_id,
|
|
|
|
|
"kernel_id",
|
|
|
|
|
info.kernel_id,
|
2025-06-26 14:22:50 -05:00
|
|
|
"Scratch_Size",
|
2025-03-13 22:40:03 -05:00
|
|
|
info.private_segment_size,
|
2025-06-26 14:22:50 -05:00
|
|
|
"LDS_Block_Size",
|
2025-03-13 22:40:03 -05:00
|
|
|
info.group_segment_size,
|
2025-06-26 14:22:50 -05:00
|
|
|
"VGPR_Count",
|
|
|
|
|
sym->arch_vgpr_count,
|
|
|
|
|
"Accum_VGPR_Count",
|
|
|
|
|
sym->accum_vgpr_count,
|
|
|
|
|
"SGPR_Count",
|
|
|
|
|
sym->sgpr_count,
|
2025-03-13 22:40:03 -05:00
|
|
|
"workgroup_size",
|
|
|
|
|
info.workgroup_size.x * info.workgroup_size.y * info.workgroup_size.z,
|
|
|
|
|
"grid_size",
|
2025-02-20 10:14:37 +00:00
|
|
|
info.grid_size.x * info.grid_size.y * info.grid_size.z,
|
2025-06-26 14:22:50 -05:00
|
|
|
"stream_ID",
|
|
|
|
|
stream_id.handle,
|
2025-02-20 10:14:37 +00:00
|
|
|
[&](::perfetto::EventContext ctx) {
|
2025-06-09 13:14:55 +05:30
|
|
|
auto corr_id = current.correlation_id.internal;
|
|
|
|
|
auto counter_it = dispatch_counter_id_value.find(corr_id);
|
|
|
|
|
if(counter_it != dispatch_counter_id_value.end())
|
2025-02-20 10:14:37 +00:00
|
|
|
{
|
2025-06-09 13:14:55 +05:30
|
|
|
for(auto& [counter_id, counter_value] : counter_it->second)
|
|
|
|
|
{
|
|
|
|
|
auto name_it = counter_id_to_name.find(counter_id);
|
|
|
|
|
if(name_it != counter_id_to_name.end())
|
|
|
|
|
{
|
|
|
|
|
sdk::add_perfetto_annotation(
|
|
|
|
|
ctx, name_it->second, counter_value);
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-02-20 10:14:37 +00:00
|
|
|
}
|
|
|
|
|
});
|
2025-03-13 22:40:03 -05:00
|
|
|
TRACE_EVENT_END(
|
|
|
|
|
sdk::perfetto_category<sdk::category::kernel_dispatch>::name,
|
2025-03-14 04:45:13 -05:00
|
|
|
*_track,
|
2025-03-13 22:40:03 -05:00
|
|
|
current.end_timestamp);
|
|
|
|
|
tracing_session->FlushBlocking();
|
|
|
|
|
}
|
2024-11-07 01:15:19 -06:00
|
|
|
}
|
|
|
|
|
}
|
2025-03-13 22:40:03 -05:00
|
|
|
}
|
2024-11-07 01:15:19 -06:00
|
|
|
}
|
2025-02-20 10:14:37 +00:00
|
|
|
|
2024-11-07 01:15:19 -06:00
|
|
|
// counter tracks
|
|
|
|
|
{
|
|
|
|
|
// memory copy counter track
|
2025-02-12 12:59:53 -06:00
|
|
|
auto mem_cpy_endpoints =
|
|
|
|
|
std::map<rocprofiler_agent_id_t, std::map<rocprofiler_timestamp_t, uint64_t>>{};
|
|
|
|
|
auto mem_cpy_extremes = std::pair<uint64_t, uint64_t>{std::numeric_limits<uint64_t>::max(),
|
2025-02-06 20:02:37 +05:30
|
|
|
std::numeric_limits<uint64_t>::min()};
|
2025-02-12 12:59:53 -06:00
|
|
|
auto constexpr timestamp_buffer = 1000;
|
2024-11-07 01:15:19 -06:00
|
|
|
for(auto ditr : memory_copy_gen)
|
|
|
|
|
for(auto itr : memory_copy_gen.get(ditr))
|
|
|
|
|
{
|
|
|
|
|
uint64_t _mean_timestamp =
|
|
|
|
|
itr.start_timestamp + (0.5 * (itr.end_timestamp - itr.start_timestamp));
|
|
|
|
|
|
2025-02-12 12:59:53 -06:00
|
|
|
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp - timestamp_buffer,
|
|
|
|
|
0);
|
2024-11-07 01:15:19 -06:00
|
|
|
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.start_timestamp, 0);
|
|
|
|
|
mem_cpy_endpoints[itr.dst_agent_id].emplace(_mean_timestamp, 0);
|
|
|
|
|
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp, 0);
|
2025-02-12 12:59:53 -06:00
|
|
|
mem_cpy_endpoints[itr.dst_agent_id].emplace(itr.end_timestamp + timestamp_buffer,
|
|
|
|
|
0);
|
2024-11-07 01:15:19 -06:00
|
|
|
|
|
|
|
|
mem_cpy_extremes =
|
|
|
|
|
std::make_pair(std::min(mem_cpy_extremes.first, itr.start_timestamp),
|
|
|
|
|
std::max(mem_cpy_extremes.second, itr.end_timestamp));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for(auto ditr : memory_copy_gen)
|
|
|
|
|
for(auto itr : memory_copy_gen.get(ditr))
|
|
|
|
|
{
|
|
|
|
|
auto mbeg = mem_cpy_endpoints.at(itr.dst_agent_id).lower_bound(itr.start_timestamp);
|
|
|
|
|
auto mend = mem_cpy_endpoints.at(itr.dst_agent_id).upper_bound(itr.end_timestamp);
|
|
|
|
|
|
|
|
|
|
LOG_IF(FATAL, mbeg == mend)
|
|
|
|
|
<< "Missing range for timestamp [" << itr.start_timestamp << ", "
|
|
|
|
|
<< itr.end_timestamp << "]";
|
|
|
|
|
|
|
|
|
|
for(auto mitr = mbeg; mitr != mend; ++mitr)
|
|
|
|
|
mitr->second += itr.bytes;
|
|
|
|
|
}
|
|
|
|
|
|
2025-02-12 12:59:53 -06:00
|
|
|
constexpr auto bytes_multiplier = 1024;
|
|
|
|
|
constexpr auto extremes_endpoint_buffer = 5000;
|
2024-11-07 01:15:19 -06:00
|
|
|
|
|
|
|
|
auto mem_cpy_tracks =
|
|
|
|
|
std::unordered_map<rocprofiler_agent_id_t, ::perfetto::CounterTrack>{};
|
|
|
|
|
auto mem_cpy_cnt_names = std::vector<std::string>{};
|
|
|
|
|
mem_cpy_cnt_names.reserve(mem_cpy_endpoints.size());
|
|
|
|
|
for(auto& mitr : mem_cpy_endpoints)
|
|
|
|
|
{
|
2025-02-12 12:59:53 -06:00
|
|
|
mem_cpy_endpoints[mitr.first].emplace(mem_cpy_extremes.first - extremes_endpoint_buffer,
|
|
|
|
|
0);
|
|
|
|
|
mem_cpy_endpoints[mitr.first].emplace(
|
|
|
|
|
mem_cpy_extremes.second + extremes_endpoint_buffer, 0);
|
2024-11-07 01:15:19 -06:00
|
|
|
|
|
|
|
|
auto _track_name = std::stringstream{};
|
|
|
|
|
const auto* _agent = _get_agent(mitr.first);
|
2025-03-14 02:51:32 -05:00
|
|
|
auto agent_index_info =
|
|
|
|
|
tool_metadata.get_agent_index(_agent->id, ocfg.agent_index_value);
|
|
|
|
|
_track_name << "COPY BYTES to " << agent_index_info.label << " ["
|
|
|
|
|
<< agent_index_info.index << "] (" << agent_index_info.type << ")";
|
2024-11-07 01:15:19 -06:00
|
|
|
|
|
|
|
|
constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES;
|
|
|
|
|
auto& _name = mem_cpy_cnt_names.emplace_back(_track_name.str());
|
|
|
|
|
mem_cpy_tracks.emplace(mitr.first,
|
|
|
|
|
::perfetto::CounterTrack{_name.c_str()}
|
|
|
|
|
.set_unit(_unit)
|
|
|
|
|
.set_unit_multiplier(bytes_multiplier)
|
|
|
|
|
.set_is_incremental(false));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for(auto& mitr : mem_cpy_endpoints)
|
|
|
|
|
{
|
|
|
|
|
for(auto itr : mitr.second)
|
|
|
|
|
{
|
|
|
|
|
TRACE_COUNTER(sdk::perfetto_category<sdk::category::memory_copy>::name,
|
|
|
|
|
mem_cpy_tracks.at(mitr.first),
|
|
|
|
|
itr.first,
|
|
|
|
|
itr.second / bytes_multiplier);
|
|
|
|
|
tracing_session->FlushBlocking();
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-12-06 00:05:30 -06:00
|
|
|
|
|
|
|
|
// memory allocation counter track
|
2025-02-12 12:59:53 -06:00
|
|
|
struct free_memory_information
|
|
|
|
|
{
|
|
|
|
|
rocprofiler_timestamp_t start_timestamp = 0;
|
|
|
|
|
rocprofiler_timestamp_t end_timestamp = 0;
|
|
|
|
|
rocprofiler_address_t address = {.handle = 0};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct memory_information
|
|
|
|
|
{
|
|
|
|
|
uint64_t alloc_size = {0};
|
|
|
|
|
rocprofiler_address_t address = {.handle = 0};
|
|
|
|
|
bool is_alloc_op = {false};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct agent_and_size
|
|
|
|
|
{
|
2025-07-18 12:05:52 -05:00
|
|
|
rocprofiler_agent_id_t agent_id = rocprofiler_agent_id_t{.handle = 0};
|
|
|
|
|
uint64_t size = {0};
|
2025-02-12 12:59:53 -06:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
auto mem_alloc_endpoints =
|
|
|
|
|
std::unordered_map<rocprofiler_agent_id_t,
|
|
|
|
|
std::map<rocprofiler_timestamp_t, memory_information>>{};
|
|
|
|
|
auto mem_alloc_extremes = std::pair<uint64_t, uint64_t>{
|
2025-02-06 20:02:37 +05:30
|
|
|
std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::min()};
|
2025-02-12 12:59:53 -06:00
|
|
|
auto address_to_agent_and_size =
|
|
|
|
|
std::unordered_map<rocprofiler_address_t, agent_and_size>{};
|
|
|
|
|
auto free_mem_info = std::vector<free_memory_information>{};
|
|
|
|
|
|
|
|
|
|
// Load memory allocation endpoints
|
2024-12-06 00:05:30 -06:00
|
|
|
for(auto ditr : memory_allocation_gen)
|
|
|
|
|
for(auto itr : memory_allocation_gen.get(ditr))
|
|
|
|
|
{
|
|
|
|
|
if(itr.operation == ROCPROFILER_MEMORY_ALLOCATION_ALLOCATE ||
|
|
|
|
|
itr.operation == ROCPROFILER_MEMORY_ALLOCATION_VMEM_ALLOCATE)
|
|
|
|
|
{
|
2025-10-27 07:58:20 -07:00
|
|
|
LOG_IF(FATAL, itr.agent_id == sdk::null_agent_id)
|
2025-02-12 12:59:53 -06:00
|
|
|
<< "Missing agent id for memory allocation trace";
|
|
|
|
|
mem_alloc_endpoints[itr.agent_id].emplace(
|
|
|
|
|
itr.start_timestamp,
|
|
|
|
|
memory_information{itr.allocation_size, itr.address, true});
|
|
|
|
|
mem_alloc_endpoints[itr.agent_id].emplace(
|
|
|
|
|
itr.end_timestamp,
|
|
|
|
|
memory_information{itr.allocation_size, itr.address, true});
|
|
|
|
|
address_to_agent_and_size.emplace(
|
|
|
|
|
itr.address, agent_and_size{itr.agent_id, itr.allocation_size});
|
|
|
|
|
}
|
|
|
|
|
else if(itr.operation == ROCPROFILER_MEMORY_ALLOCATION_FREE ||
|
|
|
|
|
itr.operation == ROCPROFILER_MEMORY_ALLOCATION_VMEM_FREE)
|
|
|
|
|
{
|
|
|
|
|
// Store free memory operations in seperate vector to pair with agent
|
|
|
|
|
// and allocation size in following loop
|
|
|
|
|
free_mem_info.push_back(free_memory_information{
|
|
|
|
|
itr.start_timestamp, itr.end_timestamp, itr.address});
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
ROCP_CI_LOG(WARNING) << "unhandled memory allocation type " << itr.operation;
|
2024-12-06 00:05:30 -06:00
|
|
|
}
|
|
|
|
|
}
|
2025-02-12 12:59:53 -06:00
|
|
|
// Add free memory operations to the endpoint map
|
|
|
|
|
for(const auto& itr : free_mem_info)
|
|
|
|
|
{
|
|
|
|
|
if(address_to_agent_and_size.count(itr.address) == 0)
|
2024-12-06 00:05:30 -06:00
|
|
|
{
|
2025-02-12 12:59:53 -06:00
|
|
|
if(itr.address.handle == 0)
|
|
|
|
|
{
|
|
|
|
|
// Freeing null pointers is expected behavior and is occurs in HSA functions
|
|
|
|
|
// like hipStreamDestroy
|
|
|
|
|
ROCP_INFO << "null pointer freed due to HSA operation";
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// Following should not occur
|
|
|
|
|
ROCP_INFO << "Unpaired free operation occurred";
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
auto [agent_id, allocation_size] = address_to_agent_and_size[itr.address];
|
|
|
|
|
mem_alloc_endpoints[agent_id].emplace(
|
|
|
|
|
itr.start_timestamp, memory_information{allocation_size, itr.address, false});
|
|
|
|
|
mem_alloc_endpoints[agent_id].emplace(
|
|
|
|
|
itr.end_timestamp, memory_information{allocation_size, itr.address, false});
|
|
|
|
|
}
|
|
|
|
|
// Create running sum of allocated memory
|
|
|
|
|
for(auto& [_, endpoint_map] : mem_alloc_endpoints)
|
|
|
|
|
{
|
|
|
|
|
if(!endpoint_map.empty())
|
|
|
|
|
{
|
|
|
|
|
auto earliest_agent_timestamp = endpoint_map.begin()->first;
|
|
|
|
|
auto latest_agent_timestamp = (--endpoint_map.end())->first;
|
|
|
|
|
mem_alloc_extremes =
|
|
|
|
|
std::make_pair(std::min(mem_alloc_extremes.first, earliest_agent_timestamp),
|
|
|
|
|
std::max(mem_alloc_extremes.second, latest_agent_timestamp));
|
|
|
|
|
}
|
|
|
|
|
if(endpoint_map.size() <= 1)
|
|
|
|
|
{
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2024-12-06 00:05:30 -06:00
|
|
|
|
2025-02-12 12:59:53 -06:00
|
|
|
auto prev = endpoint_map.begin();
|
|
|
|
|
auto itr = std::next(prev);
|
|
|
|
|
for(; itr != endpoint_map.end(); ++itr, ++prev)
|
|
|
|
|
{
|
|
|
|
|
// If address or allocation type are different, add or subtract from running sum
|
|
|
|
|
if(prev->second.address != itr->second.address ||
|
|
|
|
|
prev->second.is_alloc_op != itr->second.is_alloc_op)
|
2024-12-06 00:05:30 -06:00
|
|
|
{
|
2025-02-12 12:59:53 -06:00
|
|
|
if(itr->second.is_alloc_op)
|
2024-12-06 00:05:30 -06:00
|
|
|
{
|
2025-02-12 12:59:53 -06:00
|
|
|
itr->second.alloc_size += prev->second.alloc_size;
|
2024-12-06 00:05:30 -06:00
|
|
|
}
|
2025-02-12 12:59:53 -06:00
|
|
|
else if(prev->second.alloc_size >= itr->second.alloc_size)
|
|
|
|
|
{
|
|
|
|
|
itr->second.alloc_size = prev->second.alloc_size - itr->second.alloc_size;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
itr->second.alloc_size = prev->second.alloc_size;
|
2024-12-06 00:05:30 -06:00
|
|
|
}
|
|
|
|
|
}
|
2025-02-12 12:59:53 -06:00
|
|
|
}
|
2024-12-06 00:05:30 -06:00
|
|
|
|
|
|
|
|
auto mem_alloc_tracks =
|
|
|
|
|
std::unordered_map<rocprofiler_agent_id_t, ::perfetto::CounterTrack>{};
|
2025-02-12 12:59:53 -06:00
|
|
|
auto mem_alloc_cnt_names = std::vector<std::string>{};
|
2024-12-06 00:05:30 -06:00
|
|
|
mem_alloc_cnt_names.reserve(mem_alloc_endpoints.size());
|
|
|
|
|
for(auto& alloc_itr : mem_alloc_endpoints)
|
|
|
|
|
{
|
2025-02-12 12:59:53 -06:00
|
|
|
mem_alloc_endpoints[alloc_itr.first].emplace(
|
|
|
|
|
mem_alloc_extremes.first - extremes_endpoint_buffer,
|
|
|
|
|
memory_information{0, {0}, false});
|
|
|
|
|
mem_alloc_endpoints[alloc_itr.first].emplace(
|
|
|
|
|
mem_alloc_extremes.second + extremes_endpoint_buffer,
|
|
|
|
|
memory_information{0, {0}, false});
|
2024-12-06 00:05:30 -06:00
|
|
|
|
|
|
|
|
auto _track_name = std::stringstream{};
|
2025-02-12 12:59:53 -06:00
|
|
|
const rocprofiler_agent_t* _agent = _get_agent(alloc_itr.first);
|
2024-12-06 00:05:30 -06:00
|
|
|
|
2025-03-14 02:51:32 -05:00
|
|
|
if(_agent != nullptr)
|
|
|
|
|
{
|
|
|
|
|
auto agent_index_info =
|
|
|
|
|
tool_metadata.get_agent_index(_agent->id, ocfg.agent_index_value);
|
|
|
|
|
_track_name << "ALLOCATE BYTES on " << agent_index_info.label << " ["
|
|
|
|
|
<< agent_index_info.index << "] (" << agent_index_info.type << ")";
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
_track_name << "FREE BYTES";
|
2024-12-06 00:05:30 -06:00
|
|
|
|
|
|
|
|
constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES;
|
|
|
|
|
auto& _name = mem_alloc_cnt_names.emplace_back(_track_name.str());
|
|
|
|
|
mem_alloc_tracks.emplace(alloc_itr.first,
|
|
|
|
|
::perfetto::CounterTrack{_name.c_str()}
|
|
|
|
|
.set_unit(_unit)
|
|
|
|
|
.set_unit_multiplier(bytes_multiplier)
|
|
|
|
|
.set_is_incremental(false));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for(auto& alloc_itr : mem_alloc_endpoints)
|
|
|
|
|
{
|
|
|
|
|
for(auto itr : alloc_itr.second)
|
|
|
|
|
{
|
|
|
|
|
TRACE_COUNTER(sdk::perfetto_category<sdk::category::memory_allocation>::name,
|
|
|
|
|
mem_alloc_tracks.at(alloc_itr.first),
|
|
|
|
|
itr.first,
|
2025-02-12 12:59:53 -06:00
|
|
|
itr.second.alloc_size / bytes_multiplier);
|
2024-12-06 00:05:30 -06:00
|
|
|
tracing_session->FlushBlocking();
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-07-09 21:05:45 +05:30
|
|
|
|
|
|
|
|
// scratch memory counter track
|
|
|
|
|
auto scratch_mem_endpoints =
|
|
|
|
|
std::unordered_map<rocprofiler_agent_id_t,
|
|
|
|
|
std::map<rocprofiler_timestamp_t, uint64_t>>{};
|
|
|
|
|
auto scratch_mem_extremes = std::pair<uint64_t, uint64_t>{
|
|
|
|
|
std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::min()};
|
|
|
|
|
|
|
|
|
|
// Load scratch memory usage endpoints
|
|
|
|
|
for(auto ditr : scratch_memory_gen)
|
|
|
|
|
for(auto itr : scratch_memory_gen.get(ditr))
|
|
|
|
|
{
|
|
|
|
|
// Track start and end timestamps for this scratch memory record
|
|
|
|
|
scratch_mem_endpoints[itr.agent_id].emplace(itr.start_timestamp, 0);
|
|
|
|
|
scratch_mem_endpoints[itr.agent_id].emplace(itr.end_timestamp, 0);
|
|
|
|
|
|
|
|
|
|
// Update overall time range
|
|
|
|
|
scratch_mem_extremes =
|
|
|
|
|
std::make_pair(std::min(scratch_mem_extremes.first, itr.start_timestamp),
|
|
|
|
|
std::max(scratch_mem_extremes.second, itr.end_timestamp));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Load values at each endpoint
|
|
|
|
|
for(auto ditr : scratch_memory_gen)
|
|
|
|
|
for(auto itr : scratch_memory_gen.get(ditr))
|
|
|
|
|
{
|
|
|
|
|
// For each timestamp in the range of this record
|
|
|
|
|
auto begin =
|
|
|
|
|
scratch_mem_endpoints.at(itr.agent_id).lower_bound(itr.start_timestamp);
|
|
|
|
|
auto end = scratch_mem_endpoints.at(itr.agent_id).upper_bound(itr.end_timestamp);
|
|
|
|
|
|
|
|
|
|
for(auto mitr = begin; mitr != end; ++mitr)
|
|
|
|
|
{
|
|
|
|
|
// Add scratch memory size to the counter value at this timestamp
|
|
|
|
|
if(itr.operation == ROCPROFILER_SCRATCH_MEMORY_ALLOC)
|
|
|
|
|
mitr->second = itr.allocation_size;
|
|
|
|
|
else if(itr.operation == ROCPROFILER_SCRATCH_MEMORY_FREE)
|
|
|
|
|
mitr->second = 0; // For all free events current allocation drops to 0.
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Create counter tracks for visualization
|
|
|
|
|
auto scratch_mem_tracks =
|
|
|
|
|
std::unordered_map<rocprofiler_agent_id_t, ::perfetto::CounterTrack>{};
|
|
|
|
|
auto scratch_mem_names = std::vector<std::string>{};
|
|
|
|
|
scratch_mem_names.reserve(scratch_mem_endpoints.size());
|
|
|
|
|
|
|
|
|
|
for(auto& mitr : scratch_mem_endpoints)
|
|
|
|
|
{
|
|
|
|
|
// Add buffer timestamps for better visualization
|
|
|
|
|
if(!mitr.second.empty())
|
|
|
|
|
{
|
|
|
|
|
scratch_mem_endpoints[mitr.first].emplace(
|
|
|
|
|
scratch_mem_extremes.first - extremes_endpoint_buffer, 0);
|
|
|
|
|
scratch_mem_endpoints[mitr.first].emplace(
|
|
|
|
|
scratch_mem_extremes.second + extremes_endpoint_buffer, 0);
|
|
|
|
|
|
|
|
|
|
auto _track_name = std::stringstream{};
|
|
|
|
|
const auto* _agent = _get_agent(mitr.first);
|
|
|
|
|
auto agent_index_info =
|
|
|
|
|
tool_metadata.get_agent_index(_agent->id, ocfg.agent_index_value);
|
|
|
|
|
_track_name << "SCRATCH MEMORY on " << agent_index_info.label << " ["
|
|
|
|
|
<< agent_index_info.index << "] (" << agent_index_info.type << ")";
|
|
|
|
|
|
|
|
|
|
constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES;
|
|
|
|
|
auto& _name = scratch_mem_names.emplace_back(_track_name.str());
|
|
|
|
|
scratch_mem_tracks.emplace(mitr.first,
|
|
|
|
|
::perfetto::CounterTrack{_name.c_str()}
|
|
|
|
|
.set_unit(_unit)
|
|
|
|
|
.set_unit_multiplier(bytes_multiplier)
|
|
|
|
|
.set_is_incremental(false));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Write counter values to perfetto trace
|
|
|
|
|
for(auto& mitr : scratch_mem_endpoints)
|
|
|
|
|
{
|
|
|
|
|
if(scratch_mem_tracks.count(mitr.first) > 0)
|
|
|
|
|
{
|
|
|
|
|
for(auto itr : mitr.second)
|
|
|
|
|
{
|
|
|
|
|
TRACE_COUNTER(sdk::perfetto_category<sdk::category::scratch_memory>::name,
|
|
|
|
|
scratch_mem_tracks.at(mitr.first),
|
|
|
|
|
itr.first,
|
|
|
|
|
itr.second / bytes_multiplier);
|
|
|
|
|
tracing_session->FlushBlocking();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-11-07 01:15:19 -06:00
|
|
|
}
|
|
|
|
|
|
2025-02-20 10:14:37 +00:00
|
|
|
// Create counter tracks per agent
|
|
|
|
|
{
|
|
|
|
|
auto counters_endpoints = std::unordered_map<
|
|
|
|
|
rocprofiler_agent_id_t,
|
|
|
|
|
std::unordered_map<rocprofiler_counter_id_t, std::map<uint64_t, uint64_t>>>{};
|
|
|
|
|
|
|
|
|
|
auto counters_extremes = std::pair<uint64_t, uint64_t>{
|
|
|
|
|
std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::min()};
|
|
|
|
|
|
|
|
|
|
auto constexpr timestamp_buffer = 1000;
|
|
|
|
|
|
|
|
|
|
for(auto ditr : counter_collection_gen)
|
|
|
|
|
for(const auto& record : counter_collection_gen.get(ditr))
|
|
|
|
|
{
|
|
|
|
|
const auto& info = record.dispatch_data.dispatch_info;
|
|
|
|
|
|
|
|
|
|
const auto& start_timestamp = record.dispatch_data.start_timestamp;
|
|
|
|
|
const auto& end_timestamp = record.dispatch_data.end_timestamp;
|
|
|
|
|
|
|
|
|
|
uint64_t _mean_timestamp =
|
|
|
|
|
start_timestamp + (0.5 * (end_timestamp - start_timestamp));
|
|
|
|
|
|
2025-06-09 13:14:55 +05:30
|
|
|
auto corr_id = record.dispatch_data.correlation_id.internal;
|
|
|
|
|
auto it = dispatch_counter_id_value.find(corr_id);
|
|
|
|
|
if(it != dispatch_counter_id_value.end())
|
2025-02-20 10:14:37 +00:00
|
|
|
{
|
2025-06-09 13:14:55 +05:30
|
|
|
for(auto& [counter_id, counter_value] : it->second)
|
|
|
|
|
{
|
|
|
|
|
counters_endpoints[info.agent_id][counter_id].emplace(
|
|
|
|
|
start_timestamp - timestamp_buffer, 0);
|
|
|
|
|
counters_endpoints[info.agent_id][counter_id].emplace(start_timestamp,
|
|
|
|
|
counter_value);
|
|
|
|
|
counters_endpoints[info.agent_id][counter_id].emplace(_mean_timestamp,
|
|
|
|
|
counter_value);
|
|
|
|
|
counters_endpoints[info.agent_id][counter_id].emplace(end_timestamp, 0);
|
|
|
|
|
counters_endpoints[info.agent_id][counter_id].emplace(
|
|
|
|
|
end_timestamp + timestamp_buffer, 0);
|
|
|
|
|
}
|
2025-02-20 10:14:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
counters_extremes = std::make_pair(
|
|
|
|
|
std::min(counters_extremes.first, record.dispatch_data.start_timestamp),
|
|
|
|
|
std::max(counters_extremes.second, record.dispatch_data.end_timestamp));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto counter_tracks = std::unordered_map<rocprofiler_agent_id_t,
|
|
|
|
|
std::map<std::string, ::perfetto::CounterTrack>>{};
|
|
|
|
|
|
|
|
|
|
constexpr auto extremes_endpoint_buffer = 5000;
|
|
|
|
|
|
|
|
|
|
for(auto ditr : counter_collection_gen)
|
|
|
|
|
for(const auto& record : counter_collection_gen.get(ditr))
|
|
|
|
|
{
|
|
|
|
|
const auto& info = record.dispatch_data.dispatch_info;
|
|
|
|
|
const auto& sym = tool_metadata.get_kernel_symbol(info.kernel_id);
|
|
|
|
|
|
|
|
|
|
CHECK(sym != nullptr);
|
|
|
|
|
|
|
|
|
|
auto name = sym->formatted_kernel_name;
|
|
|
|
|
|
2025-06-09 13:14:55 +05:30
|
|
|
auto corr_id = record.dispatch_data.correlation_id.internal;
|
|
|
|
|
auto it = dispatch_counter_id_value.find(corr_id);
|
|
|
|
|
if(it != dispatch_counter_id_value.end())
|
2025-02-20 10:14:37 +00:00
|
|
|
{
|
2025-06-09 13:14:55 +05:30
|
|
|
for(auto& [counter_id, counter_value] : it->second)
|
2025-02-20 10:14:37 +00:00
|
|
|
{
|
2025-06-09 13:14:55 +05:30
|
|
|
counters_endpoints[info.agent_id][counter_id].emplace(
|
|
|
|
|
counters_extremes.first - extremes_endpoint_buffer, 0);
|
|
|
|
|
counters_endpoints[info.agent_id][counter_id].emplace(
|
|
|
|
|
counters_extremes.second + extremes_endpoint_buffer, 0);
|
|
|
|
|
|
|
|
|
|
auto agent_index_info =
|
|
|
|
|
tool_metadata.get_agent_index(info.agent_id, ocfg.agent_index_value);
|
|
|
|
|
auto track_name_ss = std::stringstream{};
|
|
|
|
|
track_name_ss << agent_index_info.label << " [" << agent_index_info.index
|
|
|
|
|
<< "] "
|
|
|
|
|
<< "PMC " << counter_id_to_name.at(counter_id);
|
|
|
|
|
|
|
|
|
|
auto track_name = track_name_ss.str();
|
|
|
|
|
|
|
|
|
|
counter_tracks[info.agent_id].emplace(
|
|
|
|
|
track_name, ::perfetto::CounterTrack(track_name.c_str()));
|
|
|
|
|
auto& endpoints = counters_endpoints[info.agent_id][counter_id];
|
|
|
|
|
for(auto& counter_itr : endpoints)
|
|
|
|
|
{
|
|
|
|
|
TRACE_COUNTER(
|
|
|
|
|
sdk::perfetto_category<sdk::category::counter_collection>::name,
|
|
|
|
|
counter_tracks[info.agent_id].at(track_name),
|
|
|
|
|
counter_itr.first,
|
|
|
|
|
counter_itr.second);
|
|
|
|
|
tracing_session->FlushBlocking();
|
|
|
|
|
}
|
2025-02-20 10:14:37 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 01:15:19 -06:00
|
|
|
::perfetto::TrackEvent::Flush();
|
|
|
|
|
tracing_session->FlushBlocking();
|
|
|
|
|
tracing_session->StopBlocking();
|
|
|
|
|
|
2026-01-27 10:27:54 -08:00
|
|
|
struct read_trace_state
|
|
|
|
|
{
|
|
|
|
|
std::mutex mtx{};
|
|
|
|
|
std::atomic<size_t> amount_read{0};
|
|
|
|
|
output_stream ofs{};
|
2024-11-07 01:15:19 -06:00
|
|
|
};
|
|
|
|
|
|
2026-01-27 10:27:54 -08:00
|
|
|
auto state = std::make_shared<read_trace_state>();
|
|
|
|
|
state->ofs = get_output_stream(ocfg, std::string{"results"}, ".pftrace");
|
|
|
|
|
|
2024-11-07 01:15:19 -06:00
|
|
|
for(size_t i = 0; i < 2; ++i)
|
|
|
|
|
{
|
|
|
|
|
ROCP_TRACE << "Reading trace...";
|
2026-01-27 10:27:54 -08:00
|
|
|
|
|
|
|
|
auto is_done = std::make_shared<std::promise<void>>();
|
|
|
|
|
auto _reader = [state, is_done](::perfetto::TracingSession::ReadTraceCallbackArgs _args) {
|
|
|
|
|
auto _lk = std::unique_lock<std::mutex>{state->mtx};
|
|
|
|
|
if(_args.data && _args.size > 0)
|
|
|
|
|
{
|
|
|
|
|
ROCP_TRACE << "Writing " << _args.size << " B to trace...";
|
|
|
|
|
// Write the trace data into file
|
|
|
|
|
state->ofs.stream->write(_args.data, _args.size);
|
|
|
|
|
state->amount_read += _args.size;
|
|
|
|
|
}
|
|
|
|
|
ROCP_INFO_IF(!_args.has_more && state->amount_read > 0)
|
|
|
|
|
<< "Wrote " << state->amount_read << " B to perfetto trace file";
|
|
|
|
|
if(!_args.has_more) is_done->set_value();
|
|
|
|
|
};
|
2024-11-07 01:15:19 -06:00
|
|
|
tracing_session->ReadTrace(_reader);
|
2026-01-27 10:27:54 -08:00
|
|
|
is_done->get_future().wait();
|
2024-11-07 01:15:19 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ROCP_TRACE << "Destroying tracing session...";
|
|
|
|
|
tracing_session.reset();
|
|
|
|
|
|
|
|
|
|
ROCP_TRACE << "Flushing trace output stream...";
|
2026-01-27 10:27:54 -08:00
|
|
|
(*state->ofs.stream) << std::flush;
|
2024-11-07 01:15:19 -06:00
|
|
|
|
|
|
|
|
ROCP_TRACE << "Destroying trace output stream...";
|
2026-01-27 10:27:54 -08:00
|
|
|
state->ofs.close();
|
2024-11-07 01:15:19 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} // namespace tool
|
|
|
|
|
} // namespace rocprofiler
|
|
|
|
|
|
|
|
|
|
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
|