ROCpd support [Part 2] (#109)
* Rocpd part 2, caching * Fix shadowed variables * backward compatibility * Fixed designated initializers * Fix timemory include * Remove benchmark & Fix build issues for rhel * Add missing bracket * Fix shadowing and pedantic * Fix pedantic pt2 * Fix duplicated SDK calls * Add decay in get_size_impl * Rename sample cache to trace cache * Add cache storage supported types * Resolving track naming in sampling module * fix sampling of flushing thread * fix sampling of flushing thread 2 * throw exception upon store while buffer storage is not running * Prevent fork crashing * Fix rebase issue * Applied suggestions from code review * Change flushing thread to use PTL * Fix agent creation order * Fix stream id ci throw * Remove force setup of rocprofiler-sdk * Code cleanup * Change initialization for agent * Add missing namespace * Fix the mismatch within the tool_agent->device_id * Switch from using handle to use agent type index * Fix pmc info comparator in metadata registry --------- Co-authored-by: Aleksandar <aleksandar.djordjevic@amd.com> Co-authored-by: Milan Radosavljevic <milan.radosavljevic@amd.com> Co-authored-by: Marjan Antic <marantic@amd.com>
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
351d598869
Коммит
1f86010ca2
@@ -224,9 +224,6 @@ rocprofiler_systems_add_option(ROCPROFSYS_INSTALL_PERFETTO_TOOLS
|
||||
rocprofiler_systems_add_option(ROCPROFILER_BUILD_SQLITE3
|
||||
"Enable building sqlite3 library internally" OFF
|
||||
)
|
||||
rocprofiler_systems_add_option(ROCPROFSYS_ENABLE_BENCHMARK
|
||||
"Enable performance benchmarking capabilities for the project" OFF
|
||||
)
|
||||
|
||||
if(ROCPROFSYS_USE_PAPI)
|
||||
rocprofiler_systems_add_option(ROCPROFSYS_BUILD_PAPI "Build PAPI from submodule" ON)
|
||||
@@ -334,10 +331,6 @@ if(ROCPROFSYS_BUILD_TESTING OR "$ENV{ROCPROFSYS_CI}" MATCHES "[1-9]+|ON|on|y|yes
|
||||
include(CTest)
|
||||
endif()
|
||||
|
||||
if(ROCPROFSYS_ENABLE_BENCHMARK)
|
||||
add_compile_definitions(-DROCPROFSYS_USE_BENCHMARK=1)
|
||||
endif()
|
||||
|
||||
# ------------------------------------------------------------------------------#
|
||||
#
|
||||
# library and executables
|
||||
|
||||
@@ -1,3 +1,25 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
#
|
||||
configure_file(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/defines.hpp.in
|
||||
@@ -25,6 +47,8 @@ set(core_sources
|
||||
${CMAKE_CURRENT_LIST_DIR}/state.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/timemory.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/utility.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/agent_manager.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/node_info.cpp
|
||||
)
|
||||
|
||||
set(core_headers
|
||||
@@ -53,6 +77,9 @@ set(core_headers
|
||||
${CMAKE_CURRENT_LIST_DIR}/state.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/timemory.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/utility.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/agent.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/agent_manager.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/node_info.hpp
|
||||
)
|
||||
|
||||
add_library(rocprofiler-systems-core-library STATIC)
|
||||
@@ -69,6 +96,7 @@ add_subdirectory(binary)
|
||||
add_subdirectory(components)
|
||||
add_subdirectory(containers)
|
||||
add_subdirectory(rocpd)
|
||||
add_subdirectory(trace_cache)
|
||||
|
||||
target_include_directories(
|
||||
rocprofiler-systems-core-library
|
||||
@@ -94,6 +122,7 @@ target_link_libraries(
|
||||
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-compile-definitions>
|
||||
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-compile-options>
|
||||
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-perfetto>
|
||||
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-sqlite3>
|
||||
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-timemory>
|
||||
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-mpi>
|
||||
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-rocm>
|
||||
|
||||
@@ -43,7 +43,8 @@ enum class agent_type : uint8_t
|
||||
struct agent
|
||||
{
|
||||
agent_type type;
|
||||
uint64_t id;
|
||||
uint64_t handle;
|
||||
uint64_t device_id;
|
||||
uint32_t node_id;
|
||||
int32_t logical_node_id;
|
||||
int32_t logical_node_type_id;
|
||||
@@ -52,7 +53,7 @@ struct agent
|
||||
std::string vendor_name;
|
||||
std::string product_name;
|
||||
|
||||
size_t device_id{ 0 };
|
||||
size_t device_type_index{ 0 };
|
||||
size_t base_id{ 0 };
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
amdsmi_processor_handle smi_handle = nullptr;
|
||||
|
||||
@@ -44,11 +44,30 @@ agent_manager::insert_agent(agent& _agent)
|
||||
(_agent.type == agent_type::GPU ? _gpu_agents_cnt : _cpu_agents_cnt),
|
||||
(_agent.type == agent_type::GPU ? "GPU" : "CPU"));
|
||||
|
||||
_agent.device_id =
|
||||
_agent.device_type_index =
|
||||
(_agent.type == agent_type::GPU ? _gpu_agents_cnt++ : _cpu_agents_cnt++);
|
||||
_agents.emplace_back(std::make_shared<agent>(_agent));
|
||||
}
|
||||
|
||||
const agent&
|
||||
agent_manager::get_agent_by_type_index(size_t type_index, agent_type type) const
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(3, "Getting agent for type: %s, with type index: %ld\n",
|
||||
(type == agent_type::GPU) ? "GPU" : "CPU", type_index);
|
||||
auto _agent =
|
||||
std::find_if(_agents.begin(), _agents.end(), [&](const auto& agent_ptr) {
|
||||
return agent_ptr->type == type && agent_ptr->device_type_index == type_index;
|
||||
});
|
||||
if(_agent == _agents.end())
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Agent not found for type index: " << type_index
|
||||
<< ", type: " << (type == agent_type::GPU ? "GPU" : "CPU");
|
||||
throw std::out_of_range(oss.str());
|
||||
}
|
||||
return **_agent;
|
||||
}
|
||||
|
||||
const agent&
|
||||
agent_manager::get_agent_by_id(size_t device_id, agent_type type) const
|
||||
{
|
||||
@@ -75,7 +94,7 @@ agent_manager::get_agent_by_handle(uint64_t device_handle, agent_type type) cons
|
||||
device_handle, (type == agent_type::GPU ? "GPU" : "CPU"));
|
||||
auto _agent =
|
||||
std::find_if(_agents.begin(), _agents.end(), [&](const auto& agent_ptr) {
|
||||
return agent_ptr->type == type && agent_ptr->id == device_handle;
|
||||
return agent_ptr->type == type && agent_ptr->handle == device_handle;
|
||||
});
|
||||
if(_agent == _agents.end())
|
||||
{
|
||||
@@ -93,7 +112,7 @@ agent_manager::get_agent_by_handle(size_t device_handle) const
|
||||
ROCPROFSYS_VERBOSE(3, "Getting agent for device handle: %ld\n", device_handle);
|
||||
auto _agent =
|
||||
std::find_if(_agents.begin(), _agents.end(), [&](const auto& agent_ptr) {
|
||||
return agent_ptr->id == device_handle;
|
||||
return agent_ptr->handle == device_handle;
|
||||
});
|
||||
if(_agent == _agents.end())
|
||||
{
|
||||
|
||||
@@ -41,8 +41,9 @@ struct agent_manager
|
||||
~agent_manager() = default;
|
||||
|
||||
void insert_agent(agent& agent);
|
||||
const agent& get_agent_by_type_index(size_t type_index, agent_type type) const;
|
||||
const agent& get_agent_by_id(size_t device_id, agent_type type) const;
|
||||
const agent& get_agent_by_handle(size_t device_id, agent_type type) const;
|
||||
const agent& get_agent_by_handle(size_t device_handle, agent_type type) const;
|
||||
const agent& get_agent_by_handle(size_t device_handle) const;
|
||||
|
||||
std::vector<std::shared_ptr<agent>> get_agents_by_type(agent_type type) const;
|
||||
|
||||
@@ -1,353 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <chrono>
|
||||
#include <cstdlib>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <unistd.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "core/benchmark/category.hpp"
|
||||
#include "core/debug.hpp"
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace benchmark
|
||||
{
|
||||
namespace
|
||||
{
|
||||
template <bool enabled, typename category_enum, category_enum... enabled_categories>
|
||||
struct benchmark_impl
|
||||
{
|
||||
template <category_enum... categories>
|
||||
struct scope
|
||||
{
|
||||
scope(const scope&) = delete;
|
||||
scope& operator=(const scope&) = delete;
|
||||
~scope() = default;
|
||||
|
||||
protected:
|
||||
scope() = default;
|
||||
scope(scope&&) = default;
|
||||
scope& operator=(scope&&) = default;
|
||||
};
|
||||
|
||||
template <category_enum... categories>
|
||||
static void start()
|
||||
{}
|
||||
|
||||
template <category_enum... categories>
|
||||
static void end()
|
||||
{}
|
||||
|
||||
template <category_enum... categories>
|
||||
[[nodiscard]] static scope<categories...> scoped_trace()
|
||||
{
|
||||
return scope<categories...>{};
|
||||
}
|
||||
|
||||
static void init_from_env(const char* = nullptr) {}
|
||||
static void show_results() {}
|
||||
};
|
||||
|
||||
using tid_t = __pid_t;
|
||||
struct indexed_category
|
||||
{
|
||||
size_t category;
|
||||
tid_t thread_id;
|
||||
|
||||
friend bool operator==(const indexed_category& lhs, const indexed_category& rhs)
|
||||
{
|
||||
return lhs.category == rhs.category && lhs.thread_id == rhs.thread_id;
|
||||
}
|
||||
};
|
||||
|
||||
struct indexed_category_hash
|
||||
{
|
||||
size_t operator()(const indexed_category& p) const noexcept
|
||||
{
|
||||
std::size_t hash1 = std::hash<size_t>{}(p.category);
|
||||
std::size_t hash2 = std::hash<size_t>{}(p.thread_id);
|
||||
return hash1 ^ (hash2 << 1);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename category_enum, category_enum... enabled_categories>
|
||||
struct benchmark_impl<true, category_enum, enabled_categories...>
|
||||
{
|
||||
static_assert(std::is_enum_v<category_enum>, "category_enum must be an enum");
|
||||
|
||||
public:
|
||||
using clock = std::chrono::high_resolution_clock;
|
||||
using time_point = clock::time_point;
|
||||
static constexpr size_t _max_categories = static_cast<size_t>(category_enum::count);
|
||||
|
||||
template <category_enum... categories>
|
||||
struct scope
|
||||
{
|
||||
friend benchmark_impl;
|
||||
|
||||
public:
|
||||
scope(const scope&) = delete;
|
||||
scope& operator=(const scope&) = delete;
|
||||
~scope() { end<categories...>(); }
|
||||
|
||||
protected:
|
||||
scope() { start<categories...>(); }
|
||||
|
||||
scope(scope&&) = default;
|
||||
scope& operator=(scope&&) = default;
|
||||
};
|
||||
|
||||
template <category_enum... categories>
|
||||
static void start()
|
||||
{
|
||||
static const thread_local auto _thread_id = gettid();
|
||||
const auto now = clock::now();
|
||||
std::lock_guard lock(m_mutex);
|
||||
(..., (is_category_defined<categories>([&] {
|
||||
if(m_enabled.test(to_index(categories)))
|
||||
m_started[{ to_index(categories), _thread_id }] = now;
|
||||
})));
|
||||
}
|
||||
|
||||
template <category_enum... categories>
|
||||
static void end()
|
||||
{
|
||||
static const thread_local auto _thread_id = getpid();
|
||||
const auto _end_time = clock::now();
|
||||
std::lock_guard lock(m_mutex);
|
||||
(..., (is_category_defined<categories>([&] {
|
||||
if(m_enabled.test(to_index(categories)))
|
||||
end_category(_end_time, categories, _thread_id);
|
||||
})));
|
||||
}
|
||||
|
||||
template <category_enum... categories>
|
||||
[[nodiscard]] static scope<categories...> scoped_trace()
|
||||
{
|
||||
return scope<categories...>{};
|
||||
}
|
||||
|
||||
static void init_from_env(const char* envVar = "ROCPROFSYS_BENCHMARK_CATEGORIES")
|
||||
{
|
||||
std::lock_guard lock(m_mutex);
|
||||
const auto* env = std::getenv(envVar);
|
||||
if(env == nullptr || std::string(env).empty())
|
||||
{
|
||||
ROCPROFSYS_WARNING(1, "No BENCHMARK categories specified in environment "
|
||||
"variable ROCPROFSYS_BENCHMARK_CATEGORIES.\n");
|
||||
return;
|
||||
}
|
||||
std::string _str(env);
|
||||
std::istringstream ss(_str);
|
||||
std::string token;
|
||||
|
||||
while(std::getline(ss, token, ','))
|
||||
{
|
||||
token.erase(0, token.find_first_not_of(" \t"));
|
||||
token.erase(token.find_last_not_of(" \t") + 1);
|
||||
for(category_enum cat : compiledCategories)
|
||||
{
|
||||
if(to_string(cat) == token)
|
||||
{
|
||||
m_enabled.set(to_index(cat));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void show_results()
|
||||
{
|
||||
std::lock_guard lock(m_mutex);
|
||||
std::vector<std::pair<category_enum, result_data>> sorted;
|
||||
|
||||
for(category_enum cat : compiledCategories)
|
||||
{
|
||||
const auto& data = m_results[to_index(cat)];
|
||||
if(data.count > 0)
|
||||
{
|
||||
sorted.emplace_back(cat, data);
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(sorted.begin(), sorted.end(), [](const auto& a, const auto& b) {
|
||||
return a.second.total_time > b.second.total_time;
|
||||
});
|
||||
|
||||
constexpr uint32_t _category = 30;
|
||||
constexpr uint32_t _calls = 8;
|
||||
constexpr uint32_t _total = 12;
|
||||
constexpr uint32_t _avg = 10;
|
||||
constexpr uint32_t _min = 10;
|
||||
constexpr uint32_t _max = 10;
|
||||
|
||||
std::cout << "\033[32m"
|
||||
<< std::string(_category + _calls + _total + _avg + _min + _max, '=')
|
||||
<< "\n";
|
||||
std::cout << "Benchmark Results (Sorted by Total Time):\n";
|
||||
std::cout << std::string(_category + _calls + _total + _avg + _min + _max, '-')
|
||||
<< "\n";
|
||||
std::cout << std::left << std::setw(_category) << "Category" << std::right
|
||||
<< std::setw(_calls) << "Calls" << std::setw(_total) << "Total(ms)"
|
||||
<< std::setw(_avg) << "Avg(us)" << std::setw(_min) << "Min(us)"
|
||||
<< std::setw(_max) << "Max(us)" << "\n";
|
||||
|
||||
std::cout << std::string(_category + _calls + _total + _avg + _min + _max, '-')
|
||||
<< "\n";
|
||||
|
||||
for(const auto& [cat, data] : sorted)
|
||||
{
|
||||
double totalMs = static_cast<double>(data.total_time) / 1000.0;
|
||||
double avgUs = static_cast<double>(data.total_time) / data.count;
|
||||
|
||||
std::cout << std::left << std::setw(_category) << to_string(cat) << std::right
|
||||
<< std::setw(_calls) << data.count << std::setw(_total)
|
||||
<< std::fixed << std::setprecision(3) << totalMs << std::setw(_avg)
|
||||
<< std::fixed << std::setprecision(1) << avgUs << std::setw(_min)
|
||||
<< data.min_time << std::setw(_max) << data.max_time << "\n";
|
||||
}
|
||||
|
||||
std::cout << std::string(_category + _calls + _total + _avg + _min + _max, '=')
|
||||
<< "\033[0m" << "\n\n";
|
||||
}
|
||||
|
||||
private:
|
||||
struct result_data
|
||||
{
|
||||
uint64_t total_time = 0;
|
||||
size_t count = 0;
|
||||
uint64_t min_time = std::numeric_limits<uint64_t>::max();
|
||||
uint64_t max_time = std::numeric_limits<uint64_t>::min();
|
||||
|
||||
void update(uint64_t duration)
|
||||
{
|
||||
total_time += duration;
|
||||
count += 1;
|
||||
if(duration < min_time) min_time = duration;
|
||||
if(duration > max_time) max_time = duration;
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr size_t to_index(category_enum cat)
|
||||
{
|
||||
return static_cast<size_t>(cat);
|
||||
}
|
||||
|
||||
static void end_category(const time_point& end_time, category_enum cat,
|
||||
const tid_t thread_id)
|
||||
{
|
||||
const size_t _idx = to_index(cat);
|
||||
auto _it = m_started.find({ _idx, thread_id });
|
||||
if(_it == m_started.end())
|
||||
{
|
||||
ROCPROFSYS_WARNING(1, "Benchmark error: missing start time for category!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
auto duration =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(end_time - _it->second)
|
||||
.count();
|
||||
m_started.erase(_it);
|
||||
m_results[_idx].update(duration);
|
||||
}
|
||||
|
||||
template <category_enum Cat, typename Func>
|
||||
static constexpr void is_category_defined(Func&& f)
|
||||
{
|
||||
if constexpr(((Cat == enabled_categories) || ...))
|
||||
{
|
||||
f();
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr std::array<category_enum, sizeof...(enabled_categories)>
|
||||
compiledCategories = { enabled_categories... };
|
||||
|
||||
static inline std::unordered_map<indexed_category, time_point, indexed_category_hash>
|
||||
m_started;
|
||||
static inline std::array<result_data, _max_categories> m_results{};
|
||||
static inline std::bitset<_max_categories> m_enabled;
|
||||
static inline std::mutex m_mutex;
|
||||
};
|
||||
|
||||
#ifdef ROCPROFSYS_ENABLE_BENCHMARK
|
||||
using _benchmark_impl = benchmark::benchmark_impl<
|
||||
static_cast<bool>(ROCPROFSYS_ENABLE_BENCHMARK), benchmark::category,
|
||||
benchmark::category::kernel_dispatch, benchmark::category::memory_copy,
|
||||
benchmark::category::memory_allocate, benchmark::category::db_entry_kernel_dispatch,
|
||||
benchmark::category::db_entry_memory_copy,
|
||||
benchmark::category::db_entry_memory_allocate,
|
||||
benchmark::category::perfetto_kernel_dispatch,
|
||||
benchmark::category::sdk_tool_buffered_tracing>;
|
||||
#else
|
||||
using _benchmark_impl = benchmark::benchmark_impl<false, benchmark::category>;
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
template <category... categories>
|
||||
void
|
||||
start()
|
||||
{
|
||||
_benchmark_impl::template start<categories...>();
|
||||
}
|
||||
|
||||
template <category... categories>
|
||||
void
|
||||
end()
|
||||
{
|
||||
_benchmark_impl::template end<categories...>();
|
||||
}
|
||||
|
||||
template <category... categories>
|
||||
[[nodiscard]] auto
|
||||
scoped_trace()
|
||||
{
|
||||
return _benchmark_impl::template scoped_trace<categories...>();
|
||||
}
|
||||
|
||||
inline void
|
||||
init_from_env(const char* envVar = "BENCHMARK_CATEGORIES")
|
||||
{
|
||||
_benchmark_impl::init_from_env(envVar);
|
||||
}
|
||||
|
||||
inline void
|
||||
show_results()
|
||||
{
|
||||
_benchmark_impl::show_results();
|
||||
}
|
||||
|
||||
} // namespace benchmark
|
||||
} // namespace rocprofsys
|
||||
@@ -1,68 +0,0 @@
|
||||
// Copyright (c) 2018-2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// with the Software without restriction, including without limitation the
|
||||
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
// sell copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
//
|
||||
// * Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this Software without specific prior written permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace benchmark
|
||||
{
|
||||
enum class category
|
||||
{
|
||||
kernel_dispatch,
|
||||
db_entry_kernel_dispatch,
|
||||
memory_copy,
|
||||
db_entry_memory_copy,
|
||||
memory_allocate,
|
||||
db_entry_memory_allocate,
|
||||
perfetto_kernel_dispatch,
|
||||
sdk_tool_buffered_tracing,
|
||||
count
|
||||
};
|
||||
|
||||
constexpr std::string_view
|
||||
to_string(category cat)
|
||||
{
|
||||
switch(cat)
|
||||
{
|
||||
case category::kernel_dispatch: return "kernel_dispatch";
|
||||
case category::db_entry_kernel_dispatch: return "db_entry_kernel_dispatch";
|
||||
case category::memory_copy: return "memory_copy";
|
||||
case category::memory_allocate: return "memory_allocate";
|
||||
case category::db_entry_memory_copy: return "db_entry_memory_copy";
|
||||
case category::db_entry_memory_allocate: return "db_entry_memory_allocate";
|
||||
case category::perfetto_kernel_dispatch: return "perfetto_kernel_dispatch";
|
||||
case category::sdk_tool_buffered_tracing: return "sdk_tool_buffered_tracing";
|
||||
default: return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace benchmark
|
||||
} // namespace rocprofsys
|
||||
@@ -96,6 +96,7 @@ ROCPROFSYS_DEFINE_CATEGORY(category, rocm_hip_api, ROCPROFSYS_CATEGORY_ROCM_HIP_
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_hsa_api, ROCPROFSYS_CATEGORY_ROCM_HSA_API, "rocm_hsa_api", "ROCm HSA functions")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_kernel_dispatch, ROCPROFSYS_CATEGORY_ROCM_KERNEL_DISPATCH, "rocm_kernel_dispatch", "ROCm Kernel dispatch")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_memory_copy, ROCPROFSYS_CATEGORY_ROCM_MEMORY_COPY, "rocm_memory_copy", "ROCm Async Memory Copy")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_memory_allocate, ROCPROFSYS_CATEGORY_ROCM_MEMORY_ALLOCATE, "rocm_memory_allocate", "ROCm Memory Allocations")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_hip_stream, ROCPROFSYS_CATEGORY_ROCM_HIP_STREAM, "rocm_hip_stream", "ROCm HIP Stream")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_scratch_memory, ROCPROFSYS_CATEGORY_ROCM_SCRATCH_MEMORY, "rocm_scratch_memory", "ROCm kernel scratch memory reallocations")
|
||||
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_page_migration, ROCPROFSYS_CATEGORY_ROCM_PAGE_MIGRATION, "rocm_page_migration", "ROCm memory page migration")
|
||||
@@ -167,6 +168,7 @@ using name = perfetto_category<Tp...>;
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_hsa_api), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_kernel_dispatch), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_memory_copy), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_memory_allocate), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_hip_stream), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_scratch_memory), \
|
||||
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_page_migration), \
|
||||
|
||||
@@ -1,3 +1,25 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "cpu.hpp"
|
||||
#include "agent_manager.hpp"
|
||||
|
||||
@@ -153,6 +175,7 @@ query_cpu_agents()
|
||||
auto logical_id = id_count++;
|
||||
auto id = cpu_count++;
|
||||
auto cur_agent = agent{ agent_type::CPU,
|
||||
0,
|
||||
id,
|
||||
node_id,
|
||||
logical_id,
|
||||
|
||||
@@ -124,19 +124,20 @@ query_rocm_agents()
|
||||
auto& _agent_manager = agent_manager::get_instance();
|
||||
for(size_t i = 0; i < num_agents; ++i)
|
||||
{
|
||||
const auto* _agent = static_cast<const rocprofiler_agent_v0_t*>(agents[i]);
|
||||
auto cur_agent = agent{
|
||||
const auto* _agent = static_cast<const rocprofiler_agent_v0_t*>(agents[i]);
|
||||
agent cur_agent;
|
||||
cur_agent.type =
|
||||
(_agent->type == ROCPROFILER_AGENT_TYPE_GPU ? agent_type::GPU
|
||||
: agent_type::CPU),
|
||||
_agent->device_id,
|
||||
_agent->node_id,
|
||||
_agent->logical_node_id,
|
||||
_agent->logical_node_type_id,
|
||||
std::string(_agent->name),
|
||||
std::string(_agent->vendor_name),
|
||||
std::string(_agent->product_name),
|
||||
std::string(_agent->model_name),
|
||||
};
|
||||
: agent_type::CPU);
|
||||
cur_agent.handle = _agent->id.handle;
|
||||
cur_agent.device_id = _agent->device_id;
|
||||
cur_agent.node_id = _agent->node_id;
|
||||
cur_agent.logical_node_id = _agent->logical_node_id;
|
||||
cur_agent.logical_node_type_id = _agent->logical_node_type_id;
|
||||
cur_agent.name = std::string(_agent->name);
|
||||
cur_agent.model_name = std::string(_agent->model_name);
|
||||
cur_agent.vendor_name = std::string(_agent->vendor_name);
|
||||
cur_agent.product_name = std::string(_agent->product_name);
|
||||
_agent_manager.insert_agent(cur_agent);
|
||||
}
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
|
||||
@@ -1,3 +1,25 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
set(rocpd_sources
|
||||
${CMAKE_CURRENT_LIST_DIR}/data_processor.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/json.cpp
|
||||
|
||||
@@ -69,8 +69,7 @@ data_processor::initialize_metadata()
|
||||
size_t
|
||||
data_processor::insert_string(const char* str)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
auto it = _string_map.find(str);
|
||||
auto it = _string_map.find(str);
|
||||
if(it != _string_map.end()) return _string_map.at(str);
|
||||
|
||||
data_storage::queries::table_insert_query query;
|
||||
@@ -242,24 +241,12 @@ data_processor::insert_sample(const char* track, uint64_t timestamp, size_t even
|
||||
}
|
||||
|
||||
size_t
|
||||
data_processor::insert_event(size_t category_id, size_t stack_id, size_t parent_stack_id,
|
||||
size_t correlation_id, const char* call_stack,
|
||||
const char* line_info, const char* extdata)
|
||||
data_processor::insert_event(size_t string_primary_key, size_t stack_id,
|
||||
size_t parent_stack_id, size_t correlation_id,
|
||||
const char* call_stack, const char* line_info,
|
||||
const char* extdata)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
auto it = _category_map.find(category_id);
|
||||
if(it == _category_map.end())
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "Insert event failed! Error: Unknown category id: " << category_id
|
||||
<< " for UPID: " << _upid;
|
||||
throw std::runtime_error(oss.str());
|
||||
}
|
||||
|
||||
ROCPROFSYS_VERBOSE(3, "Insert event category id: %ld, string id: %ld\n", category_id,
|
||||
it->second);
|
||||
|
||||
_insert_event_statement(_upid.c_str(), it->second, stack_id, parent_stack_id,
|
||||
_insert_event_statement(_upid.c_str(), string_primary_key, stack_id, parent_stack_id,
|
||||
correlation_id, call_stack, line_info, extdata);
|
||||
return data_storage::database::get_instance().get_last_insert_id();
|
||||
}
|
||||
@@ -456,7 +443,6 @@ void
|
||||
data_processor::insert_args(size_t event_id, size_t position, const char* type,
|
||||
const char* name, const char* value, const char* extdata)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
_insert_args_statement(_upid.c_str(), event_id, position, type, name, value, extdata);
|
||||
}
|
||||
|
||||
@@ -464,40 +450,24 @@ void
|
||||
data_processor::insert_stream_info(size_t stream_id, size_t node_id, size_t process_id,
|
||||
const char* name, const char* extdata)
|
||||
{
|
||||
if(_stream_ids.count(stream_id) > 0)
|
||||
{
|
||||
// ROCPROFSYS_WARNING(
|
||||
// 1, "Insert stream info failed! Error: Stream ID %ld already exists!\n",
|
||||
// stream_id);
|
||||
return;
|
||||
}
|
||||
data_storage::queries::table_insert_query query;
|
||||
data_storage::database::get_instance().execute_query(
|
||||
query.set_table_name("rocpd_info_stream_" + _upid)
|
||||
.set_columns("id", "guid", "nid", "pid", "name", "extdata")
|
||||
.set_values(stream_id, _upid, node_id, process_id, name, extdata)
|
||||
.get_query_string());
|
||||
_stream_ids.insert(stream_id);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_queue_info(size_t queue_id, size_t node_id, size_t process_id,
|
||||
const char* name, const char* extdata)
|
||||
{
|
||||
if(_queue_ids.count(queue_id) > 0)
|
||||
{
|
||||
// ROCPROFSYS_WARNING(
|
||||
// 1, "Insert queue info failed! Error: Queue ID %ld already exists!\n",
|
||||
// queue_id);
|
||||
return;
|
||||
}
|
||||
data_storage::queries::table_insert_query query;
|
||||
data_storage::database::get_instance().execute_query(
|
||||
query.set_table_name("rocpd_info_queue_" + _upid)
|
||||
.set_columns("id", "guid", "nid", "pid", "name", "extdata")
|
||||
.set_values(queue_id, _upid, node_id, process_id, name, extdata)
|
||||
.get_query_string());
|
||||
_queue_ids.insert(queue_id);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -506,20 +476,9 @@ data_processor::insert_code_object(size_t id, size_t node_id, size_t process_id,
|
||||
uint64_t ld_size, uint64_t ld_delta,
|
||||
const char* storage_type, const char* extdata)
|
||||
{
|
||||
if(_code_object_ids.count(id) > 0)
|
||||
{
|
||||
// ROCPROFSYS_WARNING(
|
||||
// 1,
|
||||
// "Insert code object info failed! Error: Code object ID %ld already
|
||||
// exists!\n", id);
|
||||
return;
|
||||
}
|
||||
ROCPROFSYS_VERBOSE(2, "Insert code object with ID: %ld\n", id);
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
_insert_code_object_statement(id, _upid.c_str(), node_id, process_id, agent_id, uri,
|
||||
ld_base, ld_size, ld_delta, storage_type, extdata);
|
||||
|
||||
_code_object_ids.insert(id);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -530,40 +489,11 @@ data_processor::insert_kernel_symbol(
|
||||
uint32_t private_segment_size, uint32_t sgrp_count, uint32_t arch_vgrp_count,
|
||||
uint32_t accum_vgrp_count, const char* extdata)
|
||||
{
|
||||
if(_kernel_sym_ids.count(id) > 0)
|
||||
{
|
||||
// ROCPROFSYS_WARNING(
|
||||
// 1,
|
||||
// "Insert kernel symbol failed! Error: Kernel symbol ID %ld already
|
||||
// exists!\n", id);
|
||||
return;
|
||||
}
|
||||
|
||||
ROCPROFSYS_VERBOSE(2, "Insert kernel symbol: %s with ID: %ld\n", name, id);
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
_insert_kernel_symbol_statement(
|
||||
id, _upid.c_str(), node_id, process_id, code_obj_id, name, display_name,
|
||||
kernel_obj, kernarg_segmnt_size, kernarg_segment_alignment, group_segment_size,
|
||||
private_segment_size, sgrp_count, arch_vgrp_count, accum_vgrp_count, extdata);
|
||||
|
||||
_kernel_sym_ids.insert(id);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::insert_category(size_t category_id, const char* name)
|
||||
{
|
||||
auto it = _category_map.find(category_id);
|
||||
if(it != _category_map.end())
|
||||
{
|
||||
// ROCPROFSYS_WARNING(
|
||||
// 1, "Insert category failed! Error: Category %s already exist!\n", name);
|
||||
return;
|
||||
}
|
||||
auto name_id = insert_string(name);
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
ROCPROFSYS_VERBOSE(2, "Insert category: name: %s, id: %ld, name id: %ld\n", name,
|
||||
category_id, name_id);
|
||||
_category_map.emplace(category_id, name_id);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -571,7 +501,6 @@ data_processor::insert_region(size_t node_id, size_t process_id, size_t thread_i
|
||||
uint64_t start, uint64_t end, size_t name_id,
|
||||
size_t event_id, const char* extdata)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
ROCPROFSYS_VERBOSE(2, "Insert region for event id: %ld\n", event_id);
|
||||
|
||||
_insert_region_statement(_upid.c_str(), node_id, process_id, thread_id, start, end,
|
||||
@@ -587,8 +516,6 @@ data_processor::insert_kernel_dispatch(
|
||||
size_t grid_size_x, size_t grid_size_y, size_t grid_size_z, size_t region_name_id,
|
||||
size_t event_id, const char* extdata)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
|
||||
ROCPROFSYS_VERBOSE(2, "Insert kernel dispatch for event id: %ld\n", event_id);
|
||||
|
||||
_insert_kernel_dispatch_statement(
|
||||
@@ -607,8 +534,6 @@ data_processor::insert_memory_copy(size_t node_id, size_t process_id, size_t thr
|
||||
size_t region_name_id, size_t event_id,
|
||||
const char* extdata)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(_data_mutex);
|
||||
|
||||
_insert_memory_copy_statement(_upid.c_str(), node_id, process_id, thread_id, start,
|
||||
end, name_id, dst_agent_id, dst_addr, src_agent_id,
|
||||
src_addr, size, queue_id, stream_id, region_name_id,
|
||||
@@ -663,6 +588,18 @@ data_processor::insert_thread_info(size_t node_id, size_t parent_process_id,
|
||||
return thread_idx;
|
||||
}
|
||||
|
||||
size_t
|
||||
data_processor::map_thread_id_to_primary_key(size_t thread_id)
|
||||
{
|
||||
auto it = _thread_id_map.find(thread_id);
|
||||
|
||||
if(it == _thread_id_map.end())
|
||||
{
|
||||
throw std::invalid_argument("Given thread id don't exist");
|
||||
}
|
||||
return _thread_id_map.at(thread_id);
|
||||
}
|
||||
|
||||
void
|
||||
data_processor::flush()
|
||||
{
|
||||
|
||||
@@ -39,7 +39,7 @@ struct data_processor
|
||||
using insert_event_stmt =
|
||||
std::function<void(const char*, size_t, size_t, size_t, size_t, const char*,
|
||||
const char*, const char*)>;
|
||||
using insert_pmc_event_stms =
|
||||
using insert_pmc_event_stmt =
|
||||
std::function<void(const char*, size_t, size_t, double, const char*)>;
|
||||
using insert_sample_stmt =
|
||||
std::function<void(const char*, size_t, uint64_t, size_t, const char*)>;
|
||||
@@ -124,9 +124,10 @@ public:
|
||||
void insert_track(const char* track_name, size_t node_id, size_t process_id,
|
||||
std::optional<size_t> thread_id, const char* extdata = "{}");
|
||||
|
||||
size_t insert_event(size_t category_id, size_t stack_id, size_t parent_stack_id,
|
||||
size_t correlation_id, const char* call_stack = "{}",
|
||||
const char* line_info = "{}", const char* extdata = "{}");
|
||||
size_t insert_event(size_t string_primary_key, size_t stack_id,
|
||||
size_t parent_stack_id, size_t correlation_id,
|
||||
const char* call_stack = "{}", const char* line_info = "{}",
|
||||
const char* extdata = "{}");
|
||||
|
||||
void insert_pmc_event(size_t event_id, size_t agent_id, const char* pmc_descriptor,
|
||||
double value, const char* extdata = "{}");
|
||||
@@ -143,8 +144,6 @@ public:
|
||||
void insert_sample(const char* track, uint64_t timestamp, size_t event_id,
|
||||
const char* extdata = "{}");
|
||||
|
||||
void insert_category(size_t category_id, const char* name);
|
||||
|
||||
void insert_region(size_t node_id, size_t process_id, size_t thread_id,
|
||||
uint64_t start, uint64_t end, size_t name_id, size_t event_id,
|
||||
const char* extdata = "{}");
|
||||
@@ -199,6 +198,8 @@ public:
|
||||
size_t stream_id, size_t event_id,
|
||||
const char* extdata = "{}");
|
||||
|
||||
size_t map_thread_id_to_primary_key(size_t thread_id);
|
||||
|
||||
void flush();
|
||||
|
||||
private:
|
||||
@@ -223,16 +224,10 @@ private:
|
||||
std::unordered_map<pmc_identifier, size_t, pmc_identifier_hash, pmc_identifier_equal>
|
||||
_pmc_descriptor_map;
|
||||
std::unordered_map<size_t, size_t> _thread_id_map;
|
||||
std::unordered_map<size_t, size_t> _category_map;
|
||||
std::unordered_map<std::string, size_t> _string_map;
|
||||
|
||||
std::set<uint64_t> _code_object_ids;
|
||||
std::set<uint64_t> _kernel_sym_ids;
|
||||
std::set<uint64_t> _stream_ids;
|
||||
std::set<uint64_t> _queue_ids;
|
||||
|
||||
insert_event_stmt _insert_event_statement;
|
||||
insert_pmc_event_stms _insert_pmc_event_statement;
|
||||
insert_pmc_event_stmt _insert_pmc_event_statement;
|
||||
insert_sample_stmt _insert_sample_statement;
|
||||
insert_region_stmt _insert_region_statement;
|
||||
insert_kernel_dispatch_stmt _insert_kernel_dispatch_statement;
|
||||
@@ -244,8 +239,6 @@ private:
|
||||
insert_memory_alloc_no_agent_stmt _insert_memory_alloc_no_agent_statement;
|
||||
|
||||
std::string _upid{};
|
||||
|
||||
std::mutex _data_mutex;
|
||||
};
|
||||
|
||||
} // namespace rocpd
|
||||
|
||||
@@ -93,9 +93,8 @@ database::initialize_schema()
|
||||
return new_file_path;
|
||||
}
|
||||
}
|
||||
return std::string(
|
||||
"rocprofiler-systems/source/lib/core/rocpd/data_storage/schema/")
|
||||
.append(filename);
|
||||
// TODO: Update to look for the system's rocpd schema
|
||||
return std::string("source/lib/core/rocpd/data_storage/schema/").append(filename);
|
||||
};
|
||||
|
||||
std::vector<std::string_view> schema_files = { "rocpd_tables.sql", "rocpd_views.sql",
|
||||
@@ -118,9 +117,11 @@ database::initialize_schema()
|
||||
std::string query = ss_query.str();
|
||||
|
||||
std::regex upid_pattern("\\{\\{uuid\\}\\}");
|
||||
std::regex guid_pattern("\\{\\{guid\\}\\}");
|
||||
std::regex view_upid_pattern("\\{\\{view_upid\\}\\}");
|
||||
|
||||
query = std::regex_replace(query, upid_pattern, "_" + get_upid());
|
||||
query = std::regex_replace(query, guid_pattern, get_upid());
|
||||
query = std::regex_replace(query, view_upid_pattern, "");
|
||||
|
||||
validate_sqlite3_result(
|
||||
|
||||
@@ -495,6 +495,9 @@ get_buffered_domains()
|
||||
const auto supported = std::unordered_set<rocprofiler_buffer_tracing_kind_t>{
|
||||
ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH,
|
||||
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
|
||||
# if(ROCPROFILER_VERSION >= 600)
|
||||
ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION,
|
||||
# endif
|
||||
# if(ROCPROFILER_VERSION < 10000)
|
||||
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION,
|
||||
# endif
|
||||
@@ -540,6 +543,16 @@ get_buffered_domains()
|
||||
{
|
||||
_data.emplace(ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API);
|
||||
}
|
||||
# if(ROCPROFILER_VERSION >= 600)
|
||||
else if(itr == "memory_allocation")
|
||||
{
|
||||
_data.emplace(ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION);
|
||||
}
|
||||
# endif
|
||||
else if(itr == "memory_copy")
|
||||
{
|
||||
_data.emplace(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY);
|
||||
}
|
||||
else
|
||||
{
|
||||
for(size_t idx = 0; idx < buffer_tracing_info.size(); ++idx)
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
set(trace_cache_sources
|
||||
${CMAKE_CURRENT_LIST_DIR}/cache_manager.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/storage_parser.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/buffer_storage.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/metadata_registry.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rocpd_post_processing.cpp
|
||||
)
|
||||
|
||||
set(trace_cache_headers
|
||||
${CMAKE_CURRENT_LIST_DIR}/cache_manager.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/storage_parser.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/buffer_storage.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/cache_utility.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/metadata_registry.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rocpd_post_processing.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/sample_type.hpp
|
||||
)
|
||||
|
||||
target_sources(
|
||||
rocprofiler-systems-core-library
|
||||
PRIVATE ${trace_cache_sources} ${trace_cache_headers}
|
||||
)
|
||||
@@ -0,0 +1,178 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "buffer_storage.hpp"
|
||||
#include "PTL/Task.hh"
|
||||
#include "PTL/TaskGroup.hh"
|
||||
#include "PTL/ThreadPool.hh"
|
||||
#include "debug.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
#include <chrono>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
#include <unistd.h>
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
constexpr auto CACHE_FILE_FLUSH_TIMEOUT = 10ms;
|
||||
constexpr auto NUM_OF_THREADS = 1;
|
||||
} // namespace
|
||||
|
||||
buffer_storage::buffer_storage(pid_t _pid)
|
||||
{
|
||||
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
m_thread_pool = std::make_unique<PTL::ThreadPool>(NUM_OF_THREADS);
|
||||
m_thread_pool->initialize_threadpool(NUM_OF_THREADS);
|
||||
|
||||
m_task_group = std::make_unique<PTL::TaskGroup<void>>(m_thread_pool.get());
|
||||
m_task_group->exec([this, _pid]() {
|
||||
std::ofstream _ofs(filename, std::ios::binary | std::ios::out);
|
||||
|
||||
if(!_ofs)
|
||||
{
|
||||
std::stringstream _ss;
|
||||
_ss << "Error opening file for writing: " << filename;
|
||||
throw std::runtime_error(_ss.str());
|
||||
}
|
||||
|
||||
auto execute_flush = [&](std::ofstream& ofs, bool force = false) {
|
||||
size_t _head, _tail;
|
||||
{
|
||||
std::lock_guard guard{ m_mutex };
|
||||
_head = m_head;
|
||||
_tail = m_tail;
|
||||
|
||||
if(_head == _tail)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
auto used_space =
|
||||
m_head > m_tail ? (m_head - m_tail) : (buffer_size - m_tail + m_head);
|
||||
if(!force && used_space < flush_threshold)
|
||||
{
|
||||
return;
|
||||
}
|
||||
m_tail = m_head;
|
||||
}
|
||||
|
||||
if(_head > _tail)
|
||||
{
|
||||
ofs.write(reinterpret_cast<const char*>(m_buffer->data() + _tail),
|
||||
_head - _tail);
|
||||
}
|
||||
else
|
||||
{
|
||||
ofs.write(reinterpret_cast<const char*>(m_buffer->data() + _tail),
|
||||
buffer_size - _tail);
|
||||
ofs.write(reinterpret_cast<const char*>(m_buffer->data()), _head);
|
||||
}
|
||||
};
|
||||
|
||||
ROCPROFSYS_DEBUG("Starting buffered storage flushing thread for pid %d",
|
||||
static_cast<int>(_pid));
|
||||
m_created_process = _pid;
|
||||
std::mutex _shutdown_condition_mutex;
|
||||
while(m_running)
|
||||
{
|
||||
execute_flush(_ofs);
|
||||
std::unique_lock _lock{ _shutdown_condition_mutex };
|
||||
m_shutdown_condition.wait_for(
|
||||
_lock, std::chrono::milliseconds(CACHE_FILE_FLUSH_TIMEOUT),
|
||||
[&]() { return !m_running; });
|
||||
}
|
||||
|
||||
execute_flush(_ofs, true);
|
||||
_ofs.close();
|
||||
m_exit_finished = true;
|
||||
m_exit_condition.notify_one();
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
buffer_storage::shutdown()
|
||||
{
|
||||
ROCPROFSYS_DEBUG("Buffer storage shutting down..");
|
||||
m_running = false;
|
||||
m_shutdown_condition.notify_all();
|
||||
|
||||
if(m_created_process != getpid())
|
||||
{
|
||||
ROCPROFSYS_DEBUG(
|
||||
"Buffer storage is not created in same process as shutting down..");
|
||||
return;
|
||||
}
|
||||
|
||||
std::mutex _exit_mutex;
|
||||
std::unique_lock _exit_lock{ _exit_mutex };
|
||||
m_exit_condition.wait(_exit_lock, [&]() { return m_exit_finished; });
|
||||
m_thread_pool->destroy_threadpool();
|
||||
}
|
||||
|
||||
void
|
||||
buffer_storage::fragment_memory()
|
||||
{
|
||||
auto* _data = m_buffer->data();
|
||||
memset(_data + m_head, 0xFFFF, buffer_size - m_head);
|
||||
*reinterpret_cast<entry_type*>(_data + m_head) = entry_type::fragmented_space;
|
||||
|
||||
size_t remaining_bytes = buffer_size - m_head - minimal_fragmented_memory_size;
|
||||
*reinterpret_cast<size_t*>(_data + m_head + sizeof(entry_type)) = remaining_bytes;
|
||||
m_head = 0;
|
||||
}
|
||||
|
||||
uint8_t*
|
||||
buffer_storage::reserve_memory_space(size_t len)
|
||||
{
|
||||
size_t _size;
|
||||
{
|
||||
std::lock_guard scope{ m_mutex };
|
||||
|
||||
if((m_head + len + minimal_fragmented_memory_size) > buffer_size)
|
||||
{
|
||||
fragment_memory();
|
||||
}
|
||||
_size = m_head;
|
||||
m_head = m_head + len;
|
||||
}
|
||||
|
||||
auto* _result = m_buffer->data() + _size;
|
||||
memset(_result, 0, len);
|
||||
return _result;
|
||||
}
|
||||
|
||||
bool
|
||||
buffer_storage::is_running() const
|
||||
{
|
||||
return m_running;
|
||||
}
|
||||
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,162 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "PTL/TaskGroup.hh"
|
||||
#include "PTL/ThreadPool.hh"
|
||||
#include "cache_utility.hpp"
|
||||
#include "sample_type.hpp"
|
||||
#include <cassert>
|
||||
#include <condition_variable>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <thread>
|
||||
#include <type_traits>
|
||||
#include <PTL/PTL.hh>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
class cache_manager;
|
||||
class buffer_storage
|
||||
{
|
||||
public:
|
||||
static buffer_storage& get_instance();
|
||||
|
||||
template <typename... T>
|
||||
void store(entry_type type, T&&... values)
|
||||
{
|
||||
if(!is_running())
|
||||
{
|
||||
throw std::runtime_error(
|
||||
"Trying to use buffered storage while it is not running");
|
||||
return;
|
||||
}
|
||||
|
||||
constexpr bool is_supported_type = (supported_types::is_supported<T> && ...);
|
||||
static_assert(is_supported_type, "Supported types are const char*, char*, "
|
||||
"unsigned long, unsigned int, and int.");
|
||||
|
||||
auto arg_size = get_size(values...);
|
||||
auto total_size = arg_size + sizeof(type) + sizeof(size_t);
|
||||
auto* reserved_memory = reserve_memory_space(total_size);
|
||||
size_t position = 0;
|
||||
|
||||
auto store_value = [&](const auto& val) {
|
||||
using Type = decltype(val);
|
||||
size_t len = 0;
|
||||
auto* dest = reserved_memory + position;
|
||||
if constexpr(std::is_same_v<std::decay_t<Type>, const char*>)
|
||||
{
|
||||
len = strlen(val) + 1;
|
||||
std::memcpy(dest, val, len);
|
||||
}
|
||||
else
|
||||
{
|
||||
using ClearType = std::decay_t<decltype(val)>;
|
||||
len = sizeof(ClearType);
|
||||
*reinterpret_cast<ClearType*>(dest) = val;
|
||||
}
|
||||
position += len;
|
||||
};
|
||||
|
||||
store_value(type);
|
||||
store_value(arg_size);
|
||||
|
||||
(store_value(values), ...);
|
||||
}
|
||||
|
||||
private:
|
||||
friend class cache_manager;
|
||||
buffer_storage(pid_t _pid);
|
||||
void shutdown();
|
||||
bool is_running() const;
|
||||
void fragment_memory();
|
||||
uint8_t* reserve_memory_space(size_t len);
|
||||
|
||||
template <typename... Types>
|
||||
struct typelist
|
||||
{
|
||||
template <typename T>
|
||||
constexpr static bool is_supported =
|
||||
(std::is_same_v<std::decay_t<T>, Types> || ...);
|
||||
};
|
||||
|
||||
using supported_types = typelist<const char*, char*, uint64_t, int32_t, uint32_t>;
|
||||
|
||||
template <typename T>
|
||||
static constexpr bool is_string_literal_v =
|
||||
std::is_same_v<std::decay_t<T>, const char*> ||
|
||||
std::is_same_v<std::decay_t<T>, char*>;
|
||||
|
||||
template <typename T>
|
||||
constexpr size_t get_size_impl(T&& val)
|
||||
{
|
||||
if constexpr(is_string_literal_v<T>)
|
||||
{
|
||||
size_t size = 0;
|
||||
while(val[size] != '\0')
|
||||
{
|
||||
size++;
|
||||
}
|
||||
return ++size;
|
||||
}
|
||||
else
|
||||
{
|
||||
return sizeof(T);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... T>
|
||||
constexpr size_t get_size(T&&... val)
|
||||
{
|
||||
auto total_size = 0;
|
||||
((total_size += get_size_impl(val)), ...);
|
||||
return total_size;
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex m_mutex;
|
||||
std::condition_variable m_exit_condition;
|
||||
bool m_exit_finished{ false };
|
||||
bool m_running{ true };
|
||||
std::condition_variable m_shutdown_condition;
|
||||
|
||||
std::unique_ptr<PTL::ThreadPool>m_thread_pool;
|
||||
std::unique_ptr<PTL::TaskGroup<void>> m_task_group;
|
||||
size_t m_head{ 0 };
|
||||
size_t m_tail{ 0 };
|
||||
std::unique_ptr<buffer_array_t> m_buffer{ std::make_unique<buffer_array_t>() };
|
||||
pid_t m_created_process;
|
||||
};
|
||||
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,79 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "cache_manager.hpp"
|
||||
#include "core/config.hpp"
|
||||
#include "core/trace_cache/storage_parser.hpp"
|
||||
#include "debug.hpp"
|
||||
#include "trace_cache/rocpd_post_processing.hpp"
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
cache_manager&
|
||||
cache_manager::get_instance()
|
||||
{
|
||||
static cache_manager instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
cache_manager::cache_manager()
|
||||
: m_postprocessing{ m_metadata }
|
||||
{
|
||||
m_postprocessing.register_parser_callback(m_parser);
|
||||
}
|
||||
|
||||
void
|
||||
cache_manager::post_process()
|
||||
{
|
||||
if(m_storage.is_running())
|
||||
{
|
||||
ROCPROFSYS_WARNING(2, "Postprocessing called without previously shutting down "
|
||||
"cache storage. Calling shutdown explicitly..\n");
|
||||
shutdown();
|
||||
}
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
ROCPROFSYS_PRINT(
|
||||
"Generating rocpd with collected data. This may take a while..\n");
|
||||
}
|
||||
post_process_metadata();
|
||||
m_parser.consume_storage();
|
||||
}
|
||||
|
||||
void
|
||||
cache_manager::post_process_metadata()
|
||||
{
|
||||
m_postprocessing.post_process_metadata();
|
||||
}
|
||||
|
||||
void
|
||||
cache_manager::shutdown()
|
||||
{
|
||||
m_storage.shutdown();
|
||||
}
|
||||
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,67 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "buffer_storage.hpp"
|
||||
#include "core/trace_cache/rocpd_post_processing.hpp"
|
||||
#include "metadata_registry.hpp"
|
||||
#include "storage_parser.hpp"
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
class cache_manager
|
||||
{
|
||||
public:
|
||||
static cache_manager& get_instance();
|
||||
buffer_storage& get_buffer_storage() { return m_storage; }
|
||||
metadata_registry& get_metadata_regsitry() { return m_metadata; }
|
||||
void shutdown();
|
||||
void post_process();
|
||||
|
||||
private:
|
||||
void post_process_metadata();
|
||||
cache_manager();
|
||||
|
||||
buffer_storage m_storage{ getpid() };
|
||||
metadata_registry m_metadata;
|
||||
storage_parser m_parser{ getpid() };
|
||||
rocpd_post_processing m_postprocessing;
|
||||
};
|
||||
|
||||
inline metadata_registry&
|
||||
get_metadata_registry()
|
||||
{
|
||||
return cache_manager::get_instance().get_metadata_regsitry();
|
||||
}
|
||||
|
||||
inline buffer_storage&
|
||||
get_buffer_storage()
|
||||
{
|
||||
return cache_manager::get_instance().get_buffer_storage();
|
||||
}
|
||||
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,45 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include "sample_type.hpp"
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <timemory/units.hpp>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
constexpr size_t buffer_size = 100 * tim::units::megabyte;
|
||||
constexpr size_t flush_threshold = 80 * tim::units::megabyte;
|
||||
const auto filename = "/tmp/buffered_storage_" + std::to_string(getpid()) + ".bin";
|
||||
|
||||
constexpr size_t minimal_fragmented_memory_size = sizeof(entry_type) + sizeof(size_t);
|
||||
using buffer_array_t = std::array<uint8_t, buffer_size>;
|
||||
|
||||
constexpr auto ABSOLUTE = "ABS";
|
||||
constexpr auto PERCENTAGE = "%";
|
||||
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
+296
@@ -0,0 +1,296 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "metadata_registry.hpp"
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename ReturnType, typename DataType, typename Filter>
|
||||
std::optional<ReturnType>
|
||||
get_type_info(const DataType& data, const Filter& filter)
|
||||
{
|
||||
std::optional<ReturnType> result = std::nullopt;
|
||||
data.rlock([&filter, &result](const auto& _data) {
|
||||
auto it = std::find_if(_data.begin(), _data.end(), filter);
|
||||
result = it == _data.end() ? std::nullopt : std::optional<ReturnType>(*it);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto
|
||||
assign_set_to_vector(T& result)
|
||||
{
|
||||
return [&result](const auto& _data) { result.assign(_data.cbegin(), _data.cend()); };
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void
|
||||
metadata_registry::set_process(const info::process& process)
|
||||
{
|
||||
m_process.wlock([&process](auto& _process) { _process = process; });
|
||||
}
|
||||
|
||||
void
|
||||
metadata_registry::add_pmc_info(const info::pmc& pmc_info)
|
||||
{
|
||||
m_pmc_infos.wlock([&pmc_info](auto& _data) {
|
||||
if(_data.count(pmc_info) > 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
_data.emplace(pmc_info);
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
metadata_registry::add_thread_info(const info::thread& thread_info)
|
||||
{
|
||||
m_threads.wlock([&thread_info](auto& _data) {
|
||||
if(_data.count(thread_info) > 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
_data.emplace(thread_info);
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
metadata_registry::add_track(const info::track& track_info)
|
||||
{
|
||||
m_tracks.wlock([&track_info](auto& _data) {
|
||||
if(_data.count(track_info) > 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
_data.emplace(track_info);
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
metadata_registry::add_queue(const uint64_t& queue_handle)
|
||||
{
|
||||
m_queues.wlock([&queue_handle](auto& _data) {
|
||||
if(_data.count(queue_handle) > 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
_data.emplace(queue_handle);
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
metadata_registry::add_stream(const uint64_t& stream_handle)
|
||||
{
|
||||
m_streams.wlock([&stream_handle](auto& _data) {
|
||||
if(_data.count(stream_handle) > 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
_data.emplace(stream_handle);
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
metadata_registry::add_string(const std::string_view& string_value)
|
||||
{
|
||||
m_strings.wlock([&string_value](auto& _data) {
|
||||
if(_data.count(string_value) > 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
_data.emplace(string_value);
|
||||
});
|
||||
}
|
||||
|
||||
info::process
|
||||
metadata_registry::get_process_info() const
|
||||
{
|
||||
info::process result;
|
||||
m_process.rlock([&result](const auto& _process) { result = _process; });
|
||||
return result;
|
||||
}
|
||||
|
||||
std::optional<info::pmc>
|
||||
metadata_registry::get_pmc_info(const std::string_view& unique_name) const
|
||||
{
|
||||
return get_type_info<info::pmc>(m_pmc_infos, [&unique_name](const info::pmc& val) {
|
||||
return val.name == unique_name;
|
||||
});
|
||||
}
|
||||
|
||||
std::optional<info::thread>
|
||||
metadata_registry::get_thread_info(const uint32_t& thread_id) const
|
||||
{
|
||||
return get_type_info<info::thread>(m_threads, [&thread_id](const info::thread& val) {
|
||||
return val.thread_id == thread_id;
|
||||
});
|
||||
}
|
||||
|
||||
std::optional<info::track>
|
||||
metadata_registry::get_track_info(const std::string_view& track_name) const
|
||||
{
|
||||
return get_type_info<info::track>(m_tracks, [&track_name](const info::track& val) {
|
||||
return val.track_name == track_name;
|
||||
});
|
||||
}
|
||||
|
||||
std::vector<info::pmc>
|
||||
metadata_registry::get_pmc_info_list() const
|
||||
{
|
||||
std::vector<info::pmc> result;
|
||||
m_pmc_infos.rlock(assign_set_to_vector(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<info::thread>
|
||||
metadata_registry::get_thread_info_list() const
|
||||
{
|
||||
std::vector<info::thread> result;
|
||||
m_threads.rlock(assign_set_to_vector(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<info::track>
|
||||
metadata_registry::get_track_info_list() const
|
||||
{
|
||||
std::vector<info::track> result;
|
||||
m_tracks.rlock(assign_set_to_vector(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<uint64_t>
|
||||
metadata_registry::get_queue_list() const
|
||||
{
|
||||
std::vector<uint64_t> result;
|
||||
m_queues.rlock(assign_set_to_vector(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<uint64_t>
|
||||
metadata_registry::get_stream_list() const
|
||||
{
|
||||
std::vector<uint64_t> result;
|
||||
m_streams.rlock(assign_set_to_vector(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::string_view>
|
||||
metadata_registry::get_string_list() const
|
||||
{
|
||||
std::vector<std::string_view> result;
|
||||
m_strings.rlock(assign_set_to_vector(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM
|
||||
|
||||
void
|
||||
metadata_registry::add_code_object(
|
||||
const rocprofiler_callback_tracing_code_object_load_data_t& code_object)
|
||||
{
|
||||
m_code_objects.wlock([&code_object](auto& _data) {
|
||||
if(_data.count(code_object) > 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
_data.emplace(code_object);
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
metadata_registry::add_kernel_symbol(
|
||||
const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t&
|
||||
kernel_symbol)
|
||||
{
|
||||
m_kernel_symbols.wlock([&kernel_symbol](auto& _data) {
|
||||
if(_data.count(kernel_symbol) > 0)
|
||||
{
|
||||
return;
|
||||
}
|
||||
_data.emplace(kernel_symbol);
|
||||
});
|
||||
}
|
||||
|
||||
std::optional<rocprofiler_callback_tracing_code_object_load_data_t>
|
||||
metadata_registry::get_code_object(uint64_t code_object_id) const
|
||||
{
|
||||
return get_type_info<rocprofiler_callback_tracing_code_object_load_data_t>(
|
||||
m_code_objects,
|
||||
[&code_object_id](
|
||||
const rocprofiler_callback_tracing_code_object_load_data_t& val) {
|
||||
return val.code_object_id == code_object_id;
|
||||
});
|
||||
}
|
||||
|
||||
std::optional<rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t>
|
||||
metadata_registry::get_kernel_symbol(uint64_t kernel_id) const
|
||||
{
|
||||
return get_type_info<
|
||||
rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t>(
|
||||
m_kernel_symbols,
|
||||
[&kernel_id](
|
||||
const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t&
|
||||
val) { return val.kernel_id == kernel_id; });
|
||||
}
|
||||
|
||||
std::vector<rocprofiler_callback_tracing_code_object_load_data_t>
|
||||
metadata_registry::get_code_object_list() const
|
||||
{
|
||||
std::vector<rocprofiler_callback_tracing_code_object_load_data_t> result;
|
||||
m_code_objects.rlock(assign_set_to_vector(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t>
|
||||
metadata_registry::get_kernel_symbol_list() const
|
||||
{
|
||||
std::vector<rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t>
|
||||
result;
|
||||
m_kernel_symbols.rlock(assign_set_to_vector(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
rocprofiler::sdk::buffer_name_info_t<const char*>
|
||||
metadata_registry::get_buffer_name_info() const
|
||||
{
|
||||
return m_buffered_tracing_info;
|
||||
}
|
||||
|
||||
rocprofiler::sdk::callback_name_info_t<const char*>
|
||||
metadata_registry::get_callback_tracing_info() const
|
||||
{
|
||||
return m_callback_tracing_info;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
+218
@@ -0,0 +1,218 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/synchronized.hpp"
|
||||
#include "core/agent.hpp"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <optional>
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
# include <rocprofiler-sdk/callback_tracing.h>
|
||||
# include <rocprofiler-sdk/cxx/name_info.hpp>
|
||||
#endif
|
||||
#include <set>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <sys/types.h>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
namespace info
|
||||
{
|
||||
struct process
|
||||
{
|
||||
pid_t pid; // < Unique
|
||||
pid_t ppid;
|
||||
std::string command;
|
||||
};
|
||||
struct pmc
|
||||
{
|
||||
agent_type type;
|
||||
size_t agent_type_index;
|
||||
std::string target_arch;
|
||||
size_t event_code;
|
||||
size_t instance_id;
|
||||
std::string name; // < Unique
|
||||
std::string symbol;
|
||||
std::string description;
|
||||
std::string long_description;
|
||||
std::string component;
|
||||
std::string units;
|
||||
std::string value_type;
|
||||
std::string block;
|
||||
std::string expression;
|
||||
uint32_t is_constant;
|
||||
uint32_t is_derived;
|
||||
std::string extdata;
|
||||
};
|
||||
|
||||
struct pmc_info_hash
|
||||
{
|
||||
std::size_t operator()(const pmc& _pmc) const noexcept
|
||||
{
|
||||
std::size_t h1 = std::hash<size_t>{}(static_cast<size_t>(_pmc.type));
|
||||
std::size_t h2 = std::hash<size_t>{}(_pmc.agent_type_index);
|
||||
std::size_t h3 = std::hash<std::string>{}(_pmc.name);
|
||||
return h1 ^ (h2 << 1) ^ (h3 << 1);
|
||||
}
|
||||
};
|
||||
|
||||
struct pmc_info_equal
|
||||
{
|
||||
bool operator()(const pmc& lhs, const pmc& rhs) const noexcept
|
||||
{
|
||||
return lhs.type == rhs.type && lhs.agent_type_index == rhs.agent_type_index &&
|
||||
lhs.name == rhs.name;
|
||||
}
|
||||
};
|
||||
|
||||
struct thread
|
||||
{
|
||||
int32_t parent_process_id;
|
||||
int32_t process_id;
|
||||
uint64_t thread_id; // < Unique
|
||||
uint32_t start;
|
||||
uint32_t end;
|
||||
std::string extdata;
|
||||
friend bool operator<(const thread& lhs, const thread& rhs)
|
||||
{
|
||||
return lhs.thread_id < rhs.thread_id;
|
||||
}
|
||||
};
|
||||
|
||||
struct track
|
||||
{
|
||||
std::string track_name; // < Unique
|
||||
std::optional<size_t> thread_id;
|
||||
std::string extdata;
|
||||
|
||||
friend bool operator<(const track& lhs, const track& rhs)
|
||||
{
|
||||
return lhs.track_name.compare(rhs.track_name) < 0;
|
||||
}
|
||||
};
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
struct code_object_less
|
||||
{
|
||||
bool operator()(const rocprofiler_callback_tracing_code_object_load_data_t& lhs,
|
||||
const rocprofiler_callback_tracing_code_object_load_data_t& rhs) const
|
||||
{
|
||||
return lhs.code_object_id < rhs.code_object_id;
|
||||
}
|
||||
};
|
||||
|
||||
struct kernel_symbol_less
|
||||
{
|
||||
bool operator()(
|
||||
const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t& lhs,
|
||||
const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t& rhs)
|
||||
const
|
||||
{
|
||||
return lhs.kernel_object < rhs.kernel_object;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
} // namespace info
|
||||
|
||||
class cache_manager;
|
||||
struct metadata_registry
|
||||
{
|
||||
void set_process(const info::process& process);
|
||||
void add_pmc_info(const info::pmc& pmc_info);
|
||||
void add_thread_info(const info::thread& thread_info);
|
||||
void add_track(const info::track& track_info);
|
||||
void add_queue(const uint64_t& queue_handle);
|
||||
void add_stream(const uint64_t& stream_handle);
|
||||
void add_string(const std::string_view& string_value);
|
||||
|
||||
info::process get_process_info() const;
|
||||
std::optional<info::pmc> get_pmc_info(const std::string_view& unique_name) const;
|
||||
std::optional<info::thread> get_thread_info(const uint32_t& thread_id) const;
|
||||
std::optional<info::track> get_track_info(const std::string_view& track_name) const;
|
||||
std::vector<info::pmc> get_pmc_info_list() const;
|
||||
std::vector<info::thread> get_thread_info_list() const;
|
||||
std::vector<info::track> get_track_info_list() const;
|
||||
std::vector<uint64_t> get_queue_list() const;
|
||||
std::vector<uint64_t> get_stream_list() const;
|
||||
std::vector<std::string_view> get_string_list() const;
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
void add_code_object(
|
||||
const rocprofiler_callback_tracing_code_object_load_data_t& code_object);
|
||||
void add_kernel_symbol(
|
||||
const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t&
|
||||
kernel_symbol);
|
||||
std::vector<rocprofiler_callback_tracing_code_object_load_data_t>
|
||||
get_code_object_list() const;
|
||||
std::vector<rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t>
|
||||
get_kernel_symbol_list() const;
|
||||
std::optional<rocprofiler_callback_tracing_code_object_load_data_t> get_code_object(
|
||||
uint64_t code_object_id) const;
|
||||
std::optional<rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t>
|
||||
get_kernel_symbol(uint64_t kernel_id) const;
|
||||
rocprofiler::sdk::buffer_name_info_t<const char*> get_buffer_name_info() const;
|
||||
rocprofiler::sdk::callback_name_info_t<const char*> get_callback_tracing_info() const;
|
||||
#endif
|
||||
|
||||
private:
|
||||
friend class cache_manager;
|
||||
metadata_registry() = default;
|
||||
common::synchronized<info::process> m_process;
|
||||
common::synchronized<
|
||||
std::unordered_set<info::pmc, info::pmc_info_hash, info::pmc_info_equal>>
|
||||
m_pmc_infos;
|
||||
common::synchronized<std::set<info::thread>> m_threads;
|
||||
common::synchronized<std::set<info::track>> m_tracks;
|
||||
|
||||
common::synchronized<std::set<uint64_t>> m_streams;
|
||||
common::synchronized<std::set<uint64_t>> m_queues;
|
||||
common::synchronized<std::unordered_set<std::string_view>> m_strings;
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
common::synchronized<std::set<rocprofiler_callback_tracing_code_object_load_data_t,
|
||||
info::code_object_less>>
|
||||
m_code_objects;
|
||||
common::synchronized<
|
||||
std::set<rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t,
|
||||
info::kernel_symbol_less>>
|
||||
m_kernel_symbols;
|
||||
rocprofiler::sdk::buffer_name_info_t<const char*> m_buffered_tracing_info{
|
||||
rocprofiler::sdk::get_buffer_tracing_names<const char*>()
|
||||
};
|
||||
rocprofiler::sdk::callback_name_info_t<const char*> m_callback_tracing_info{
|
||||
rocprofiler::sdk::get_callback_tracing_names<const char*>()
|
||||
};
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
+574
@@ -0,0 +1,574 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "trace_cache/rocpd_post_processing.hpp"
|
||||
#include "agent_manager.hpp"
|
||||
#include "common.hpp"
|
||||
#include "config.hpp"
|
||||
#include "debug.hpp"
|
||||
#include "library/thread_info.hpp"
|
||||
#include "node_info.hpp"
|
||||
#include "rocpd/data_processor.hpp"
|
||||
#include "trace_cache/metadata_registry.hpp"
|
||||
#include "trace_cache/sample_type.hpp"
|
||||
#include "trace_cache/storage_parser.hpp"
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <timemory/utility/demangle.hpp>
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
# include "library/rocprofiler-sdk/fwd.hpp"
|
||||
# include <rocprofiler-sdk/context.h>
|
||||
# include <rocprofiler-sdk/version.h>
|
||||
#endif
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
namespace
|
||||
{
|
||||
rocpd::data_processor&
|
||||
get_data_processor()
|
||||
{
|
||||
return rocpd::data_processor::get_instance();
|
||||
}
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto
|
||||
get_handle_from_code_object(
|
||||
const rocprofiler_callback_tracing_code_object_load_data_t& code_object)
|
||||
{
|
||||
# if(ROCPROFILER_VERSION >= 600)
|
||||
return code_object.agent_id.handle;
|
||||
# else
|
||||
return code_object.rocp_agent.handle;
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
postprocessing_callback
|
||||
rocpd_post_processing::get_kernel_dispatch_callback() const
|
||||
{
|
||||
return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) {
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto _kds = static_cast<const struct kernel_dispatch_sample&>(parsed);
|
||||
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& agent_manager = agent_manager::get_instance();
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto process = m_metadata.get_process_info();
|
||||
auto agent_primary_key =
|
||||
agent_manager.get_agent_by_handle(_kds.agent_id_handle).base_id;
|
||||
|
||||
auto thread_primary_key =
|
||||
data_processor.map_thread_id_to_primary_key(_kds.thread_id);
|
||||
|
||||
auto category_id = data_processor.insert_string(
|
||||
trait::name<category::rocm_kernel_dispatch>::value);
|
||||
|
||||
auto kernel_symbol = m_metadata.get_kernel_symbol(_kds.kernel_id);
|
||||
|
||||
if(!kernel_symbol.has_value())
|
||||
{
|
||||
throw std::runtime_error("Kernel symbol is missing for kernel dispatch");
|
||||
return;
|
||||
}
|
||||
|
||||
auto region_name_primary_key = data_processor.insert_string(
|
||||
tim::demangle(kernel_symbol->kernel_name).c_str());
|
||||
|
||||
auto stack_id = _kds.correlation_id_internal;
|
||||
auto parent_stack_id = _kds.correlation_id_ancestor;
|
||||
auto correlation_id = 0;
|
||||
|
||||
auto event_id = data_processor.insert_event(category_id, stack_id,
|
||||
parent_stack_id, correlation_id);
|
||||
|
||||
data_processor.insert_kernel_dispatch(
|
||||
n_info.id, process.pid, thread_primary_key, agent_primary_key, _kds.kernel_id,
|
||||
_kds.dispatch_id, _kds.queue_id_handle, _kds.stream_handle,
|
||||
_kds.start_timestamp, _kds.end_timestamp, _kds.private_segment_size,
|
||||
_kds.group_segment_size, _kds.workgroup_size_x, _kds.workgroup_size_y,
|
||||
_kds.workgroup_size_z, _kds.grid_size_x, _kds.grid_size_y, _kds.grid_size_z,
|
||||
region_name_primary_key, event_id);
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
postprocessing_callback
|
||||
rocpd_post_processing::get_memory_copy_callback() const
|
||||
{
|
||||
return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) {
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto _mcs = static_cast<const struct memory_copy_sample&>(parsed);
|
||||
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& agent_manager = agent_manager::get_instance();
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto process = m_metadata.get_process_info();
|
||||
|
||||
auto _name = std::string{ m_metadata.get_buffer_name_info().at(
|
||||
static_cast<rocprofiler_buffer_tracing_kind_t>(_mcs.kind),
|
||||
static_cast<rocprofiler_tracing_operation_t>(_mcs.operation)) };
|
||||
auto name_primary_key = data_processor.insert_string(_name.c_str());
|
||||
|
||||
auto category_primary_key =
|
||||
data_processor.insert_string(trait::name<category::rocm_memory_copy>::value);
|
||||
|
||||
auto thread_primary_key =
|
||||
data_processor.map_thread_id_to_primary_key(_mcs.thread_id);
|
||||
|
||||
auto dst_agent_primary_key =
|
||||
agent_manager.get_agent_by_handle(_mcs.dst_agent_id_handle).base_id;
|
||||
auto src_agent_primary_key =
|
||||
agent_manager.get_agent_by_handle(_mcs.src_agent_id_handle).base_id;
|
||||
|
||||
auto stack_id = _mcs.correlation_id_internal;
|
||||
auto parent_stack_id = _mcs.correlation_id_ancestor;
|
||||
auto correlation_id = 0;
|
||||
auto queue_id = 0;
|
||||
|
||||
auto event_primary_key = data_processor.insert_event(
|
||||
category_primary_key, stack_id, parent_stack_id, correlation_id);
|
||||
|
||||
data_processor.insert_memory_copy(
|
||||
n_info.id, process.pid, thread_primary_key, _mcs.start_timestamp,
|
||||
_mcs.end_timestamp, name_primary_key, dst_agent_primary_key,
|
||||
_mcs.dst_address_value, src_agent_primary_key, _mcs.src_address_value,
|
||||
_mcs.bytes, queue_id, _mcs.stream_handle, name_primary_key,
|
||||
event_primary_key);
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
#if(ROCPROFSYS_USE_ROCM > 0 && ROCPROFILER_VERSION >= 600)
|
||||
postprocessing_callback
|
||||
rocpd_post_processing::get_memory_allocate_callback() const
|
||||
{
|
||||
# if ROCPROFSYS_USE_ROCM > 0
|
||||
auto memtype_to_db =
|
||||
[](std::string_view memory_type) -> std::pair<std::string, std::string> {
|
||||
constexpr auto MEMORY_PREFIX = std::string_view{ "MEMORY_ALLOCATION_" };
|
||||
constexpr auto SCRATCH_PREFIX = std::string_view{ "SCRATCH_MEMORY_" };
|
||||
constexpr auto VMEM_PREFIX = std::string_view{ "VMEM_" };
|
||||
constexpr auto ASYNC_PREFIX = std::string_view{ "ASYNC_" };
|
||||
|
||||
std::string _type;
|
||||
std::string _level;
|
||||
if(memory_type.find(MEMORY_PREFIX) == 0)
|
||||
{
|
||||
_type = memory_type.substr(MEMORY_PREFIX.length());
|
||||
if(_type.find(VMEM_PREFIX) == 0)
|
||||
{
|
||||
_type = _type.substr(VMEM_PREFIX.length());
|
||||
_level = "VIRTUAL";
|
||||
}
|
||||
else
|
||||
{
|
||||
_level = "REAL";
|
||||
}
|
||||
}
|
||||
else if(memory_type.find(SCRATCH_PREFIX) == 0)
|
||||
{
|
||||
_type = memory_type.substr(SCRATCH_PREFIX.length());
|
||||
_level = "SCRATCH";
|
||||
if(memory_type.find(ASYNC_PREFIX) == 0)
|
||||
{
|
||||
_type = memory_type.substr(ASYNC_PREFIX.length()); // RECLAIM
|
||||
}
|
||||
}
|
||||
|
||||
if(_type == "ALLOCATE")
|
||||
{
|
||||
_type = "ALLOC";
|
||||
}
|
||||
|
||||
return std::make_pair(_type, _level);
|
||||
};
|
||||
# endif
|
||||
|
||||
return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) {
|
||||
# if ROCPROFSYS_USE_ROCM > 0
|
||||
auto _mas = static_cast<const struct memory_allocate_sample&>(parsed);
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& agent_manager = agent_manager::get_instance();
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto process = m_metadata.get_process_info();
|
||||
auto thread_primary_key =
|
||||
data_processor.map_thread_id_to_primary_key(_mas.thread_id);
|
||||
auto agent_primary_key = std::optional<uint64_t>{};
|
||||
|
||||
const auto invalid_context = ROCPROFILER_CONTEXT_NONE;
|
||||
if(_mas.agent_id_handle != invalid_context.handle)
|
||||
{
|
||||
{
|
||||
agent_primary_key =
|
||||
agent_manager.get_agent_by_handle(_mas.agent_id_handle).base_id;
|
||||
}
|
||||
const auto* _name = m_metadata.get_buffer_name_info().at(
|
||||
static_cast<rocprofiler_buffer_tracing_kind_t>(_mas.kind),
|
||||
static_cast<rocprofiler_tracing_operation_t>(_mas.operation));
|
||||
|
||||
auto [type, level] = memtype_to_db(_name);
|
||||
|
||||
auto stack_id = _mas.correlation_id_internal;
|
||||
auto parent_stack_id = _mas.correlation_id_ancestor;
|
||||
auto correlation_id = 0;
|
||||
auto queue_id = 0;
|
||||
|
||||
auto category_primary_key = data_processor.insert_string(
|
||||
trait::name<category::rocm_memory_allocate>::value);
|
||||
|
||||
auto event_primary_key = data_processor.insert_event(
|
||||
category_primary_key, stack_id, parent_stack_id, correlation_id);
|
||||
|
||||
data_processor.insert_memory_alloc(
|
||||
n_info.id, process.pid, thread_primary_key, agent_primary_key,
|
||||
type.c_str(), level.c_str(), _mas.start_timestamp, _mas.end_timestamp,
|
||||
_mas.address_value, _mas.allocation_size, queue_id, _mas.stream_handle,
|
||||
event_primary_key);
|
||||
# endif
|
||||
};
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
postprocessing_callback
|
||||
rocpd_post_processing::get_region_callback() const
|
||||
{
|
||||
[[maybe_unused]] auto parse_args = []([[maybe_unused]] const std::string& arg_str) {
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
rocprofiler_sdk::function_args_t args;
|
||||
const std::string delimiter = ";;";
|
||||
|
||||
auto split = [](const std::string& str, const std::string& _delimiter) {
|
||||
std::vector<std::string> tokens;
|
||||
size_t start = 0;
|
||||
size_t end = str.find(_delimiter);
|
||||
|
||||
while(end != std::string::npos)
|
||||
{
|
||||
tokens.push_back(str.substr(start, end - start));
|
||||
start = end + _delimiter.length();
|
||||
end = str.find(_delimiter, start);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
};
|
||||
|
||||
auto tokens = split(arg_str, delimiter);
|
||||
|
||||
// Ensure the number of tokens is a multiple of 4
|
||||
if(tokens.size() % 4 != 0)
|
||||
{
|
||||
throw std::invalid_argument("Malformed argument string.");
|
||||
}
|
||||
|
||||
for(auto it = tokens.begin(); it != tokens.end(); it += 4)
|
||||
{
|
||||
rocprofiler_sdk::argument_info arg = { static_cast<uint32_t>(std::stoi(*it)),
|
||||
*(it + 1), *(it + 2), *(it + 3) };
|
||||
args.push_back(arg);
|
||||
}
|
||||
|
||||
return args;
|
||||
#endif
|
||||
};
|
||||
|
||||
return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) {
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto _rs = static_cast<const struct region_sample&>(parsed);
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto process = m_metadata.get_process_info();
|
||||
auto thread_primary_key =
|
||||
data_processor.map_thread_id_to_primary_key(_rs.thread_id);
|
||||
|
||||
auto callback_tracing_info = m_metadata.get_callback_tracing_info();
|
||||
auto _name = std::string{ callback_tracing_info.at(
|
||||
static_cast<rocprofiler_callback_tracing_kind_t>(_rs.kind),
|
||||
static_cast<rocprofiler_tracing_operation_t>(_rs.operation)) };
|
||||
auto name_primary_key = data_processor.insert_string(_name.c_str());
|
||||
|
||||
auto category_primary_key = data_processor.insert_string(_rs.category.c_str());
|
||||
|
||||
size_t stack_id = _rs.correlation_id_internal;
|
||||
size_t parent_stack_id = _rs.correlation_id_ancestor;
|
||||
size_t correlation_id = 0;
|
||||
|
||||
auto event_primary_key =
|
||||
data_processor.insert_event(category_primary_key, stack_id, parent_stack_id,
|
||||
correlation_id, _rs.call_stack.c_str());
|
||||
|
||||
auto args = parse_args(_rs.args_str);
|
||||
for(const auto& arg : args)
|
||||
{
|
||||
data_processor.insert_args(event_primary_key, arg.arg_number,
|
||||
arg.arg_type.c_str(), arg.arg_name.c_str(),
|
||||
arg.arg_value.c_str());
|
||||
}
|
||||
|
||||
data_processor.insert_region(n_info.id, process.pid, thread_primary_key,
|
||||
_rs.start_timestamp, _rs.end_timestamp,
|
||||
name_primary_key, event_primary_key);
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
postprocessing_callback
|
||||
rocpd_post_processing::get_in_time_sample_callback() const
|
||||
{
|
||||
return [&](const storage_parsed_type_base& parsed) {
|
||||
auto _its = static_cast<const struct in_time_sample&>(parsed);
|
||||
auto& data_processor = get_data_processor();
|
||||
auto track_primary_key = data_processor.insert_string(_its.track_name.c_str());
|
||||
|
||||
auto event_id = data_processor.insert_event(
|
||||
track_primary_key, _its.stack_id, _its.parent_stack_id, _its.correlation_id,
|
||||
_its.call_stack.c_str(), _its.line_info.c_str(), _its.event_metadata.c_str());
|
||||
data_processor.insert_sample(_its.track_name.c_str(), _its.timestamp_ns, event_id,
|
||||
"{}");
|
||||
};
|
||||
}
|
||||
postprocessing_callback
|
||||
rocpd_post_processing::get_pmc_event_with_sample_callback() const
|
||||
{
|
||||
return [&](const storage_parsed_type_base& parsed) {
|
||||
auto _pmc = static_cast<const struct pmc_event_with_sample&>(parsed);
|
||||
auto& data_processor = get_data_processor();
|
||||
auto track_primary_key = data_processor.insert_string(_pmc.track_name.c_str());
|
||||
|
||||
auto& agent_manager = agent_manager::get_instance();
|
||||
auto agent_primary_key =
|
||||
agent_manager.get_agent_by_handle(_pmc.agent_handle).base_id;
|
||||
|
||||
auto event_id = data_processor.insert_event(
|
||||
track_primary_key, _pmc.stack_id, _pmc.parent_stack_id, _pmc.correlation_id,
|
||||
_pmc.call_stack.c_str(), _pmc.line_info.c_str(), _pmc.event_metadata.c_str());
|
||||
data_processor.insert_sample(_pmc.track_name.c_str(), _pmc.timestamp_ns, event_id,
|
||||
"{}");
|
||||
|
||||
data_processor.insert_pmc_event(event_id, agent_primary_key,
|
||||
_pmc.pmc_info_name.c_str(), _pmc.value);
|
||||
};
|
||||
}
|
||||
|
||||
rocpd_post_processing::rocpd_post_processing(metadata_registry& md)
|
||||
: m_metadata(md)
|
||||
{}
|
||||
|
||||
void
|
||||
rocpd_post_processing::register_parser_callback([[maybe_unused]] storage_parser& parser)
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
if(!get_use_rocpd())
|
||||
{
|
||||
return;
|
||||
}
|
||||
parser.register_type_callback(entry_type::region, get_region_callback());
|
||||
parser.register_type_callback(entry_type::kernel_dispatch,
|
||||
get_kernel_dispatch_callback());
|
||||
parser.register_type_callback(entry_type::memory_copy, get_memory_copy_callback());
|
||||
# if(ROCPROFILER_VERSION >= 600)
|
||||
parser.register_type_callback(entry_type::memory_alloc,
|
||||
get_memory_allocate_callback());
|
||||
# endif
|
||||
parser.register_type_callback(entry_type::in_time_sample,
|
||||
get_in_time_sample_callback());
|
||||
parser.register_type_callback(entry_type::pmc_event_with_sample,
|
||||
get_pmc_event_with_sample_callback());
|
||||
ROCPROFSYS_DEBUG("Buffer parser callbacks are registered..");
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_post_processing::post_process_metadata()
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
if(!get_use_rocpd())
|
||||
{
|
||||
return;
|
||||
}
|
||||
ROCPROFSYS_DEBUG("Post processing metadata..");
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& agent_mngr = agent_manager::get_instance();
|
||||
auto n_info = node_info::get_instance();
|
||||
|
||||
data_processor.insert_node_info(n_info.id, n_info.hash, n_info.machine_id.c_str(),
|
||||
n_info.system_name.c_str(), n_info.node_name.c_str(),
|
||||
n_info.release.c_str(), n_info.version.c_str(),
|
||||
n_info.machine.c_str(), n_info.domain_name.c_str());
|
||||
|
||||
auto process_info = m_metadata.get_process_info();
|
||||
data_processor.insert_process_info(n_info.id, process_info.ppid, process_info.pid, 0,
|
||||
0, 0, 0, process_info.command.c_str(), "{}");
|
||||
|
||||
const auto& agents = agent_mngr.get_agents();
|
||||
int counter = 0;
|
||||
for(const auto& rocpd_agent : agents)
|
||||
{
|
||||
auto _base_id = rocpd::data_processor::get_instance().insert_agent(
|
||||
n_info.id, process_info.pid,
|
||||
((rocpd_agent->type == agent_type::GPU) ? "GPU" : "CPU"), counter++,
|
||||
rocpd_agent->logical_node_id, rocpd_agent->logical_node_type_id,
|
||||
rocpd_agent->device_id, rocpd_agent->name.c_str(),
|
||||
rocpd_agent->model_name.c_str(), rocpd_agent->vendor_name.c_str(),
|
||||
rocpd_agent->product_name.c_str(), "");
|
||||
rocpd_agent->base_id = _base_id;
|
||||
}
|
||||
auto _string_list = m_metadata.get_string_list();
|
||||
for(auto& _string : _string_list)
|
||||
{
|
||||
data_processor.insert_string(std::string(_string).c_str());
|
||||
}
|
||||
|
||||
auto _thread_info_list = m_metadata.get_thread_info_list();
|
||||
for(auto& t_info : _thread_info_list)
|
||||
{
|
||||
rocpd_insert_thread_id(t_info, n_info, process_info);
|
||||
}
|
||||
|
||||
auto _track_info_list = m_metadata.get_track_info_list();
|
||||
for(auto& track : _track_info_list)
|
||||
{
|
||||
auto thread_id =
|
||||
track.thread_id.has_value()
|
||||
? std::make_optional<size_t>(data_processor.map_thread_id_to_primary_key(
|
||||
track.thread_id.value()))
|
||||
: std::nullopt;
|
||||
data_processor.insert_track(track.track_name.c_str(), n_info.id, process_info.pid,
|
||||
thread_id);
|
||||
}
|
||||
|
||||
auto _code_object_list = m_metadata.get_code_object_list();
|
||||
for(const auto& code_object : _code_object_list)
|
||||
{
|
||||
auto dev_id =
|
||||
agent_mngr.get_agent_by_handle(get_handle_from_code_object(code_object))
|
||||
.base_id;
|
||||
|
||||
const char* strg_type = "UNKNOWN";
|
||||
switch(code_object.storage_type)
|
||||
{
|
||||
case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE: strg_type = "FILE"; break;
|
||||
case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY: strg_type = "MEMORY"; break;
|
||||
default: break;
|
||||
}
|
||||
data_processor.insert_code_object(code_object.code_object_id, n_info.id,
|
||||
process_info.pid, dev_id, code_object.uri,
|
||||
code_object.load_base, code_object.load_size,
|
||||
code_object.load_delta, strg_type);
|
||||
}
|
||||
|
||||
auto _kernel_symbols_list = m_metadata.get_kernel_symbol_list();
|
||||
for(const auto& kernel_symbol : _kernel_symbols_list)
|
||||
{
|
||||
auto kernel_name = tim::demangle(kernel_symbol.kernel_name);
|
||||
data_processor.insert_kernel_symbol(
|
||||
kernel_symbol.kernel_id, n_info.id, process_info.pid,
|
||||
kernel_symbol.code_object_id, kernel_symbol.kernel_name, kernel_name.c_str(),
|
||||
kernel_symbol.kernel_object, kernel_symbol.kernarg_segment_size,
|
||||
kernel_symbol.kernarg_segment_alignment, kernel_symbol.group_segment_size,
|
||||
kernel_symbol.private_segment_size, kernel_symbol.sgpr_count,
|
||||
kernel_symbol.arch_vgpr_count, kernel_symbol.accum_vgpr_count);
|
||||
|
||||
data_processor.insert_string(kernel_name.c_str());
|
||||
}
|
||||
|
||||
auto _queue_list = m_metadata.get_queue_list();
|
||||
for(const auto& queue_handle : _queue_list)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "Queue " << queue_handle;
|
||||
data_processor.insert_queue_info(queue_handle, n_info.id, process_info.pid,
|
||||
ss.str().c_str());
|
||||
}
|
||||
|
||||
auto _stream_list = m_metadata.get_stream_list();
|
||||
for(const auto& stream_handle : _stream_list)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "Stream " << stream_handle;
|
||||
data_processor.insert_stream_info(stream_handle, n_info.id, process_info.pid,
|
||||
ss.str().c_str());
|
||||
}
|
||||
|
||||
auto buffer_info_list = m_metadata.get_buffer_name_info();
|
||||
for(const auto& buffer_info : buffer_info_list)
|
||||
{
|
||||
for(const auto& item : buffer_info.items())
|
||||
{
|
||||
data_processor.insert_string(*item.second);
|
||||
}
|
||||
}
|
||||
|
||||
auto callback_info_list = m_metadata.get_callback_tracing_info();
|
||||
for(const auto& cb_info : callback_info_list)
|
||||
{
|
||||
for(const auto& item : cb_info.items())
|
||||
{
|
||||
data_processor.insert_string(*item.second);
|
||||
}
|
||||
}
|
||||
|
||||
auto pmc_info_list = m_metadata.get_pmc_info_list();
|
||||
for(const auto& pmc_info : pmc_info_list)
|
||||
{
|
||||
const auto agent_primary_key =
|
||||
agent_mngr.get_agent_by_type_index(pmc_info.agent_type_index, pmc_info.type)
|
||||
.base_id;
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
n_info.id, process_info.pid, agent_primary_key, pmc_info.target_arch.c_str(),
|
||||
pmc_info.event_code, pmc_info.instance_id, pmc_info.name.c_str(),
|
||||
pmc_info.symbol.c_str(), pmc_info.description.c_str(),
|
||||
pmc_info.long_description.c_str(), pmc_info.component.c_str(),
|
||||
pmc_info.units.c_str(), pmc_info.value_type.c_str(), pmc_info.block.c_str(),
|
||||
pmc_info.expression.c_str(), pmc_info.is_constant, pmc_info.is_derived);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void
|
||||
rocpd_post_processing::rocpd_insert_thread_id(info::thread& t_info,
|
||||
const node_info& n_info,
|
||||
const info::process& process_info) const
|
||||
{
|
||||
const auto& extended_info = thread_info::get(t_info.thread_id, SequentTID);
|
||||
if(extended_info.has_value())
|
||||
{
|
||||
t_info.start = extended_info->get_start();
|
||||
t_info.end = extended_info->get_stop();
|
||||
}
|
||||
|
||||
std::stringstream ss;
|
||||
ss << "Thread " << t_info.thread_id;
|
||||
get_data_processor().insert_thread_info(n_info.id, process_info.ppid,
|
||||
process_info.pid, t_info.thread_id,
|
||||
ss.str().c_str(), t_info.start, t_info.end);
|
||||
}
|
||||
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
+60
@@ -0,0 +1,60 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include "core/node_info.hpp"
|
||||
#include "core/trace_cache/metadata_registry.hpp"
|
||||
#include "core/trace_cache/storage_parser.hpp"
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
class rocpd_post_processing
|
||||
{
|
||||
public:
|
||||
rocpd_post_processing(metadata_registry& metadata);
|
||||
|
||||
void register_parser_callback(storage_parser& parser);
|
||||
void post_process_metadata();
|
||||
|
||||
private:
|
||||
using primary_key = size_t;
|
||||
|
||||
inline void rocpd_insert_thread_id(info::thread& t_info, const node_info& n_info,
|
||||
const info::process& process_info) const;
|
||||
|
||||
postprocessing_callback get_kernel_dispatch_callback() const;
|
||||
postprocessing_callback get_memory_copy_callback() const;
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
postprocessing_callback get_memory_allocate_callback() const;
|
||||
#endif
|
||||
postprocessing_callback get_region_callback() const;
|
||||
postprocessing_callback get_in_time_sample_callback() const;
|
||||
postprocessing_callback get_pmc_event_with_sample_callback() const;
|
||||
|
||||
metadata_registry& m_metadata;
|
||||
};
|
||||
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,198 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <unistd.h>
|
||||
#include <utility>
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
# include <rocprofiler-sdk/version.h>
|
||||
#endif
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
struct storage_parsed_type_base
|
||||
{};
|
||||
|
||||
struct kernel_dispatch_sample : storage_parsed_type_base
|
||||
{
|
||||
// Timing fields
|
||||
uint64_t start_timestamp;
|
||||
uint64_t end_timestamp;
|
||||
|
||||
// Identification fields
|
||||
uint64_t thread_id;
|
||||
uint64_t agent_id_handle;
|
||||
uint64_t kernel_id;
|
||||
uint64_t dispatch_id;
|
||||
uint64_t queue_id_handle;
|
||||
|
||||
// Correlation fields
|
||||
uint64_t correlation_id_internal;
|
||||
uint64_t correlation_id_ancestor;
|
||||
|
||||
// Dispatch configuration
|
||||
uint32_t private_segment_size;
|
||||
uint32_t group_segment_size;
|
||||
uint32_t workgroup_size_x;
|
||||
uint32_t workgroup_size_y;
|
||||
uint32_t workgroup_size_z;
|
||||
uint32_t grid_size_x;
|
||||
uint32_t grid_size_y;
|
||||
uint32_t grid_size_z;
|
||||
|
||||
// Stream handle
|
||||
size_t stream_handle;
|
||||
};
|
||||
|
||||
struct memory_copy_sample : storage_parsed_type_base
|
||||
{
|
||||
// Timing fields
|
||||
uint64_t start_timestamp;
|
||||
uint64_t end_timestamp;
|
||||
|
||||
// Identification fields
|
||||
uint64_t thread_id;
|
||||
uint64_t dst_agent_id_handle;
|
||||
uint64_t src_agent_id_handle;
|
||||
|
||||
// Operation details
|
||||
int32_t kind;
|
||||
int32_t operation;
|
||||
uint64_t bytes;
|
||||
|
||||
// Correlation fields
|
||||
uint64_t correlation_id_internal;
|
||||
uint64_t correlation_id_ancestor;
|
||||
|
||||
// Address fields (version dependent)
|
||||
uint64_t dst_address_value;
|
||||
uint64_t src_address_value;
|
||||
|
||||
// Stream handle
|
||||
size_t stream_handle;
|
||||
};
|
||||
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
struct memory_allocate_sample : storage_parsed_type_base
|
||||
{
|
||||
// Timing fields
|
||||
uint64_t start_timestamp;
|
||||
uint64_t end_timestamp;
|
||||
|
||||
// Identification fields
|
||||
uint64_t thread_id;
|
||||
uint64_t agent_id_handle;
|
||||
|
||||
// Operation details
|
||||
int32_t kind;
|
||||
int32_t operation;
|
||||
uint64_t allocation_size;
|
||||
|
||||
// Correlation fields
|
||||
uint64_t correlation_id_internal;
|
||||
uint64_t correlation_id_ancestor;
|
||||
|
||||
// Address fields (version dependent)
|
||||
uint64_t address_value;
|
||||
|
||||
// Stream handle
|
||||
size_t stream_handle;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct region_sample : storage_parsed_type_base
|
||||
{
|
||||
region_sample() = default;
|
||||
region_sample(uint64_t _thread_id, int32_t _kind, int32_t _operation,
|
||||
uint64_t _correlation_id_internal, uint64_t _correlation_id_ancestor,
|
||||
uint64_t _start_timestamp, uint64_t _end_timestamp,
|
||||
std::string _call_stack, std::string _args_str, std::string _category)
|
||||
: thread_id(_thread_id)
|
||||
, kind(_kind)
|
||||
, operation(_operation)
|
||||
, correlation_id_internal(_correlation_id_internal)
|
||||
, correlation_id_ancestor(_correlation_id_ancestor)
|
||||
, start_timestamp(_start_timestamp)
|
||||
, end_timestamp(_end_timestamp)
|
||||
, call_stack(std::move(_call_stack))
|
||||
, args_str(std::move(_args_str))
|
||||
, category(std::move(_category))
|
||||
{}
|
||||
|
||||
// Identification fields
|
||||
uint64_t thread_id;
|
||||
int32_t kind;
|
||||
int32_t operation;
|
||||
|
||||
// Correlation fields
|
||||
uint64_t correlation_id_internal;
|
||||
uint64_t correlation_id_ancestor;
|
||||
|
||||
// Timing fields
|
||||
uint64_t start_timestamp;
|
||||
uint64_t end_timestamp;
|
||||
|
||||
// Additional fields
|
||||
std::string call_stack;
|
||||
std::string args_str;
|
||||
std::string category;
|
||||
};
|
||||
|
||||
struct in_time_sample : storage_parsed_type_base
|
||||
{
|
||||
std::string track_name;
|
||||
size_t timestamp_ns;
|
||||
std::string event_metadata;
|
||||
size_t stack_id;
|
||||
size_t parent_stack_id;
|
||||
size_t correlation_id;
|
||||
std::string call_stack;
|
||||
std::string line_info;
|
||||
};
|
||||
|
||||
struct pmc_event_with_sample : in_time_sample
|
||||
{
|
||||
size_t agent_handle;
|
||||
std::string pmc_info_name;
|
||||
size_t value;
|
||||
};
|
||||
|
||||
enum class entry_type : uint32_t
|
||||
{
|
||||
in_time_sample = 0x0000,
|
||||
pmc_event_with_sample = 0x0001,
|
||||
region = 0x0002,
|
||||
kernel_dispatch = 0x0003,
|
||||
memory_copy = 0x0004,
|
||||
#if(ROCPROFSYS_USE_ROCM && ROCPROFILER_VERSION >= 600)
|
||||
memory_alloc = 0x0005,
|
||||
#endif
|
||||
fragmented_space = 0xFFFF
|
||||
};
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,230 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "storage_parser.hpp"
|
||||
#include "debug.hpp"
|
||||
#include "trace_cache/sample_type.hpp"
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
storage_parser::storage_parser(pid_t _pid)
|
||||
: m_pid(_pid)
|
||||
{}
|
||||
|
||||
void
|
||||
storage_parser::register_type_callback(
|
||||
const entry_type& type,
|
||||
const std::function<void(const storage_parsed_type_base&)>& callback)
|
||||
{
|
||||
m_callbacks[type].push_back(callback);
|
||||
}
|
||||
|
||||
void
|
||||
storage_parser::consume_storage()
|
||||
{
|
||||
ROCPROFSYS_DEBUG("Consuming buffered storage with filename: %s", filename.c_str());
|
||||
if(m_pid != getpid())
|
||||
{
|
||||
ROCPROFSYS_DEBUG(
|
||||
"Storage parser is not created in same process as shutting down..");
|
||||
return;
|
||||
}
|
||||
|
||||
std::ifstream ifs(filename, std::ios::binary);
|
||||
if(!ifs)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "Error opening file for reading: " << filename << "\n";
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
|
||||
bool _parsing_needed = !m_callbacks.empty();
|
||||
|
||||
struct __attribute__((packed)) sample_header
|
||||
{
|
||||
entry_type type;
|
||||
size_t sample_size;
|
||||
};
|
||||
|
||||
sample_header header;
|
||||
|
||||
while(!ifs.eof() && _parsing_needed)
|
||||
{
|
||||
ifs.read(reinterpret_cast<char*>(&header), sizeof(header));
|
||||
|
||||
if(header.sample_size == 0 || ifs.eof())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> sample;
|
||||
sample.reserve(header.sample_size);
|
||||
ifs.read(reinterpret_cast<char*>(sample.data()), header.sample_size);
|
||||
|
||||
if(ifs.bad())
|
||||
{
|
||||
ROCPROFSYS_WARNING(
|
||||
1,
|
||||
"Bad read while consuming buffered storage. Filename: %s. Bytes read: %d",
|
||||
filename.c_str(), static_cast<int>(ifs.tellg()));
|
||||
continue;
|
||||
}
|
||||
|
||||
switch(header.type)
|
||||
{
|
||||
case entry_type::kernel_dispatch:
|
||||
{
|
||||
kernel_dispatch_sample _kernel_dispatch_sample;
|
||||
parse_data(sample.data(), _kernel_dispatch_sample.start_timestamp,
|
||||
_kernel_dispatch_sample.end_timestamp,
|
||||
_kernel_dispatch_sample.thread_id,
|
||||
_kernel_dispatch_sample.agent_id_handle,
|
||||
_kernel_dispatch_sample.kernel_id,
|
||||
_kernel_dispatch_sample.dispatch_id,
|
||||
_kernel_dispatch_sample.queue_id_handle,
|
||||
_kernel_dispatch_sample.correlation_id_internal,
|
||||
_kernel_dispatch_sample.correlation_id_ancestor,
|
||||
_kernel_dispatch_sample.private_segment_size,
|
||||
_kernel_dispatch_sample.group_segment_size,
|
||||
_kernel_dispatch_sample.workgroup_size_x,
|
||||
_kernel_dispatch_sample.workgroup_size_y,
|
||||
_kernel_dispatch_sample.workgroup_size_z,
|
||||
_kernel_dispatch_sample.grid_size_x,
|
||||
_kernel_dispatch_sample.grid_size_y,
|
||||
_kernel_dispatch_sample.grid_size_z,
|
||||
_kernel_dispatch_sample.stream_handle);
|
||||
|
||||
invoke_callbacks(header.type, _kernel_dispatch_sample);
|
||||
break;
|
||||
}
|
||||
case entry_type::memory_copy:
|
||||
{
|
||||
memory_copy_sample _memory_copy_sample;
|
||||
parse_data(
|
||||
sample.data(), _memory_copy_sample.start_timestamp,
|
||||
_memory_copy_sample.end_timestamp, _memory_copy_sample.thread_id,
|
||||
_memory_copy_sample.dst_agent_id_handle,
|
||||
_memory_copy_sample.src_agent_id_handle, _memory_copy_sample.kind,
|
||||
_memory_copy_sample.operation, _memory_copy_sample.bytes,
|
||||
_memory_copy_sample.correlation_id_internal,
|
||||
_memory_copy_sample.correlation_id_ancestor,
|
||||
_memory_copy_sample.dst_address_value,
|
||||
_memory_copy_sample.src_address_value,
|
||||
_memory_copy_sample.stream_handle);
|
||||
invoke_callbacks(header.type, _memory_copy_sample);
|
||||
break;
|
||||
}
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
case entry_type::memory_alloc:
|
||||
{
|
||||
memory_allocate_sample _memory_allocate_sample;
|
||||
parse_data(sample.data(), _memory_allocate_sample.start_timestamp,
|
||||
_memory_allocate_sample.end_timestamp,
|
||||
_memory_allocate_sample.thread_id,
|
||||
_memory_allocate_sample.agent_id_handle,
|
||||
_memory_allocate_sample.kind,
|
||||
_memory_allocate_sample.operation,
|
||||
_memory_allocate_sample.allocation_size,
|
||||
_memory_allocate_sample.correlation_id_internal,
|
||||
_memory_allocate_sample.correlation_id_ancestor,
|
||||
_memory_allocate_sample.address_value,
|
||||
_memory_allocate_sample.stream_handle);
|
||||
|
||||
invoke_callbacks(header.type, _memory_allocate_sample);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case entry_type::region:
|
||||
{
|
||||
region_sample _region_sample;
|
||||
parse_data(sample.data(), _region_sample.thread_id, _region_sample.kind,
|
||||
_region_sample.operation,
|
||||
_region_sample.correlation_id_internal,
|
||||
_region_sample.correlation_id_ancestor,
|
||||
_region_sample.start_timestamp, _region_sample.end_timestamp,
|
||||
_region_sample.call_stack, _region_sample.args_str,
|
||||
_region_sample.category);
|
||||
|
||||
invoke_callbacks(header.type, _region_sample);
|
||||
break;
|
||||
}
|
||||
case entry_type::in_time_sample:
|
||||
{
|
||||
in_time_sample _in_time_sample;
|
||||
parse_data(sample.data(), _in_time_sample.track_name,
|
||||
_in_time_sample.timestamp_ns, _in_time_sample.event_metadata,
|
||||
_in_time_sample.stack_id, _in_time_sample.parent_stack_id,
|
||||
_in_time_sample.correlation_id, _in_time_sample.call_stack,
|
||||
_in_time_sample.line_info);
|
||||
invoke_callbacks(header.type, _in_time_sample);
|
||||
break;
|
||||
}
|
||||
case entry_type::pmc_event_with_sample:
|
||||
{
|
||||
pmc_event_with_sample _pmc_event_with_sample;
|
||||
parse_data(
|
||||
sample.data(), _pmc_event_with_sample.track_name,
|
||||
_pmc_event_with_sample.timestamp_ns,
|
||||
_pmc_event_with_sample.event_metadata,
|
||||
_pmc_event_with_sample.stack_id,
|
||||
_pmc_event_with_sample.parent_stack_id,
|
||||
_pmc_event_with_sample.correlation_id,
|
||||
_pmc_event_with_sample.call_stack, _pmc_event_with_sample.line_info,
|
||||
_pmc_event_with_sample.agent_handle,
|
||||
_pmc_event_with_sample.pmc_info_name, _pmc_event_with_sample.value);
|
||||
invoke_callbacks(header.type, _pmc_event_with_sample);
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
ifs.close();
|
||||
ROCPROFSYS_DEBUG("File parsing finished. Removing %s from file system",
|
||||
filename.c_str());
|
||||
std::remove(filename.c_str());
|
||||
}
|
||||
|
||||
void
|
||||
storage_parser::invoke_callbacks(entry_type type, const storage_parsed_type_base& parsed)
|
||||
{
|
||||
auto _callback_list = m_callbacks.find(type);
|
||||
if(_callback_list == m_callbacks.end())
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(1, "Callback not found for cache postprocessing");
|
||||
return;
|
||||
}
|
||||
|
||||
for(auto& cb : _callback_list->second)
|
||||
{
|
||||
cb(parsed);
|
||||
}
|
||||
}
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,83 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "buffer_storage.hpp"
|
||||
#include "sample_type.hpp"
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <rocprofiler-systems/categories.h>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
using postprocessing_callback = std::function<void(const storage_parsed_type_base&)>;
|
||||
class cache_manager;
|
||||
class storage_parser
|
||||
{
|
||||
public:
|
||||
void register_type_callback(const entry_type& type,
|
||||
const postprocessing_callback& callback);
|
||||
|
||||
void consume_storage();
|
||||
|
||||
private:
|
||||
friend class cache_manager;
|
||||
storage_parser(pid_t _pid);
|
||||
template <typename T>
|
||||
static void process_arg(const uint8_t*& data_pos, T& arg)
|
||||
{
|
||||
if constexpr(std::is_same_v<T, std::string>)
|
||||
{
|
||||
arg = std::string((const char*) data_pos);
|
||||
data_pos += arg.size() + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
arg = *reinterpret_cast<const T*>(data_pos);
|
||||
data_pos += sizeof(T);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
static void parse_data(const uint8_t* data_pos, Args&... args)
|
||||
{
|
||||
(process_arg(data_pos, args), ...);
|
||||
}
|
||||
|
||||
private:
|
||||
pid_t m_pid;
|
||||
void invoke_callbacks(entry_type type, const storage_parsed_type_base& parsed);
|
||||
std::map<entry_type, std::vector<postprocessing_callback>> m_callbacks;
|
||||
};
|
||||
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
@@ -147,16 +147,6 @@ reset_rocprofsys_preload()
|
||||
{
|
||||
(void) get_rocprofsys_is_preloaded();
|
||||
(void) get_rocprofsys_preload();
|
||||
auto _modified_preload = std::string{};
|
||||
for(const auto& itr : delimit(_preload_libs, ":"))
|
||||
{
|
||||
if(itr.find("librocprof-sys") != std::string::npos) continue;
|
||||
_modified_preload += common::join("", ":", itr);
|
||||
}
|
||||
if(!_modified_preload.empty() && _modified_preload.find(':') == 0)
|
||||
_modified_preload = _modified_preload.substr(1);
|
||||
|
||||
setenv("LD_PRELOAD", _modified_preload.c_str(), 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1251,9 +1241,9 @@ rocprofsys_preload()
|
||||
|
||||
verify_instrumented_preloaded();
|
||||
|
||||
static bool _once = false;
|
||||
if(_once) return _preload;
|
||||
_once = true;
|
||||
static pid_t _once = 0;
|
||||
if(_once == getpid()) return _preload;
|
||||
_once = getpid();
|
||||
|
||||
if(_preload)
|
||||
{
|
||||
|
||||
@@ -48,6 +48,7 @@ extern "C"
|
||||
ROCPROFSYS_CATEGORY_ROCM_HSA_API,
|
||||
ROCPROFSYS_CATEGORY_ROCM_KERNEL_DISPATCH,
|
||||
ROCPROFSYS_CATEGORY_ROCM_MEMORY_COPY,
|
||||
ROCPROFSYS_CATEGORY_ROCM_MEMORY_ALLOCATE,
|
||||
ROCPROFSYS_CATEGORY_ROCM_SCRATCH_MEMORY,
|
||||
ROCPROFSYS_CATEGORY_ROCM_HIP_STREAM,
|
||||
ROCPROFSYS_CATEGORY_ROCM_PAGE_MIGRATION,
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
#include "core/perfetto_fwd.hpp"
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "core/timemory.hpp"
|
||||
#include "core/trace_cache/cache_manager.hpp"
|
||||
#include "core/utility.hpp"
|
||||
#include "library/causal/data.hpp"
|
||||
#include "library/causal/experiment.hpp"
|
||||
@@ -328,37 +329,17 @@ read_command_line(pid_t _pid)
|
||||
}
|
||||
|
||||
void
|
||||
rocprofsys_preinit_rocpd()
|
||||
rocprofsys_preinit_cache()
|
||||
{
|
||||
auto& _data_processor = rocpd::data_processor::get_instance();
|
||||
const auto& _n_info = node_info::get_instance();
|
||||
auto _cmd_line = read_command_line(getpid());
|
||||
auto& _agent_manager = agent_manager::get_instance();
|
||||
auto _cmd_line = read_command_line(getpid());
|
||||
|
||||
if(_cmd_line.empty())
|
||||
{
|
||||
_cmd_line.emplace_back("rocprofiler-systems");
|
||||
}
|
||||
|
||||
_data_processor.insert_node_info(
|
||||
_n_info.id, _n_info.hash, _n_info.machine_id.c_str(), _n_info.system_name.c_str(),
|
||||
_n_info.node_name.c_str(), _n_info.release.c_str(), _n_info.version.c_str(),
|
||||
_n_info.machine.c_str(), _n_info.domain_name.c_str());
|
||||
_data_processor.insert_process_info(_n_info.id, getppid(), getpid(), 0, 0, 0, 0,
|
||||
_cmd_line[0].c_str(), "{}");
|
||||
|
||||
const auto& agents = _agent_manager.get_agents();
|
||||
for(const auto& rocpd_agent : agents)
|
||||
{
|
||||
auto _base_id = rocpd::data_processor::get_instance().insert_agent(
|
||||
_n_info.id, getpid(),
|
||||
((rocpd_agent->type == agent_type::GPU) ? "GPU" : "CPU"),
|
||||
rocpd_agent->node_id, rocpd_agent->logical_node_id,
|
||||
rocpd_agent->logical_node_type_id, rocpd_agent->id, rocpd_agent->name.c_str(),
|
||||
rocpd_agent->model_name.c_str(), rocpd_agent->vendor_name.c_str(),
|
||||
rocpd_agent->product_name.c_str(), "");
|
||||
rocpd_agent->base_id = _base_id;
|
||||
}
|
||||
trace_cache::get_metadata_registry().set_process(
|
||||
{ getpid(), getppid(), _cmd_line.at(0) });
|
||||
}
|
||||
|
||||
void
|
||||
@@ -534,7 +515,7 @@ rocprofsys_init_tooling_hidden(void)
|
||||
#if !(ROCPROFSYS_USE_ROCM > 0)
|
||||
rocprofsys_preinit_cpu_agents();
|
||||
#endif
|
||||
if(get_use_rocpd()) rocprofsys_preinit_rocpd();
|
||||
rocprofsys_preinit_cache();
|
||||
|
||||
if(get_use_process_sampling())
|
||||
{
|
||||
@@ -776,10 +757,16 @@ rocprofsys_finalize_hidden(void)
|
||||
rocprofiler_sdk::shutdown();
|
||||
}
|
||||
#endif
|
||||
auto& _manager = rocprofsys::trace_cache::cache_manager::get_instance();
|
||||
_manager.shutdown();
|
||||
_manager.post_process();
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd::data_processor::get_instance().flush();
|
||||
}
|
||||
#endif
|
||||
set_state(State::Finalized);
|
||||
std::quick_exit(EXIT_SUCCESS);
|
||||
return;
|
||||
@@ -880,6 +867,12 @@ rocprofsys_finalize_hidden(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
auto& _manager = rocprofsys::trace_cache::cache_manager::get_instance();
|
||||
_manager.shutdown();
|
||||
_manager.post_process();
|
||||
}
|
||||
|
||||
ROCPROFSYS_DEBUG_F("Stopping and destroying instrumentation bundles...\n");
|
||||
for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i)
|
||||
{
|
||||
@@ -1070,10 +1063,12 @@ rocprofsys_finalize_hidden(void)
|
||||
[](int) {});
|
||||
|
||||
common::destroy_static_objects();
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd::data_processor::get_instance().flush();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
//======================================================================================//
|
||||
|
||||
@@ -27,6 +27,8 @@
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "core/agent.hpp"
|
||||
#include "core/trace_cache/cache_manager.hpp"
|
||||
#include "core/trace_cache/cache_utility.hpp"
|
||||
#if defined(NDEBUG)
|
||||
# undef NDEBUG
|
||||
#endif
|
||||
@@ -41,6 +43,7 @@
|
||||
#include "core/perfetto.hpp"
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "core/state.hpp"
|
||||
#include "core/trace_cache/metadata_registry.hpp"
|
||||
#include "library/amd_smi.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
#include "library/thread_info.hpp"
|
||||
@@ -85,34 +88,31 @@ get_data_processor()
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_category()
|
||||
metadata_initialize_category()
|
||||
{
|
||||
get_data_processor().insert_category(ROCPROFSYS_CATEGORY_AMD_SMI,
|
||||
trait::name<category::amd_smi>::value);
|
||||
trace_cache::get_metadata_registry().add_string(
|
||||
trait::name<category::amd_smi>::value);
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_smi_tracks()
|
||||
metadata_initialize_smi_tracks()
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
const auto thread_id = std::nullopt; // Internal thread ID for amd-smi
|
||||
const auto thread_id = std::nullopt;
|
||||
|
||||
data_processor.insert_track(trait::name<category::amd_smi_mm_busy>::value, n_info.id,
|
||||
getpid(), thread_id);
|
||||
data_processor.insert_track(trait::name<category::amd_smi_power>::value, n_info.id,
|
||||
getpid(), thread_id);
|
||||
data_processor.insert_track(trait::name<category::amd_smi_temp>::value, n_info.id,
|
||||
getpid(), thread_id);
|
||||
data_processor.insert_track(trait::name<category::amd_smi_memory_usage>::value,
|
||||
n_info.id, getpid(), thread_id);
|
||||
trace_cache::get_metadata_registry().add_track(
|
||||
{ trait::name<category::amd_smi_mm_busy>::value, thread_id, "{}" });
|
||||
trace_cache::get_metadata_registry().add_track(
|
||||
{ trait::name<category::amd_smi_power>::value, thread_id, "{}" });
|
||||
trace_cache::get_metadata_registry().add_track(
|
||||
{ trait::name<category::amd_smi_temp>::value, thread_id, "{}" });
|
||||
trace_cache::get_metadata_registry().add_track(
|
||||
{ trait::name<category::amd_smi_memory_usage>::value, thread_id, "{}" });
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_smi_pmc(size_t gpu_id)
|
||||
metadata_initialize_smi_pmc(size_t gpu_id)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
// find the proper values for a following definitions
|
||||
// TODO: Find the proper values for a following definitions
|
||||
size_t EVENT_CODE = 0;
|
||||
size_t INSTANCE_ID = 0;
|
||||
const char* LONG_DESCRIPTION = "";
|
||||
@@ -121,34 +121,33 @@ rocpd_initialize_smi_pmc(size_t gpu_id)
|
||||
const char* EXPRESSION = "";
|
||||
const char* CELSIUS_DEGREES = "\u00B0C";
|
||||
auto ni = node_info::get_instance();
|
||||
const auto* TARGET_ARCH = "GPU";
|
||||
const char* TARGET_ARCH = "GPU";
|
||||
|
||||
auto& _agent_manager = agent_manager::get_instance();
|
||||
auto base_id = _agent_manager.get_agent_by_id(gpu_id, agent_type::GPU).base_id;
|
||||
trace_cache::get_metadata_registry().add_pmc_info(
|
||||
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::amd_smi_mm_busy>::value, "Busy",
|
||||
trait::name<category::amd_smi_mm_busy>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, trace_cache::PERCENTAGE, rocprofsys::trace_cache::ABSOLUTE, BLOCK,
|
||||
EXPRESSION, 0, 0, "{}" });
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::amd_smi_mm_busy>::value, "Busy",
|
||||
trait::name<category::amd_smi_mm_busy>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
"%", "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
trace_cache::get_metadata_registry().add_pmc_info(
|
||||
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::amd_smi_temp>::value, "Temp",
|
||||
trait::name<category::amd_smi_temp>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
CELSIUS_DEGREES, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::amd_smi_temp>::value, "Temp",
|
||||
trait::name<category::amd_smi_temp>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
CELSIUS_DEGREES, "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
trace_cache::get_metadata_registry().add_pmc_info(
|
||||
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::amd_smi_power>::value, "Pow",
|
||||
trait::name<category::amd_smi_power>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
"W", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::amd_smi_power>::value, "Pow",
|
||||
trait::name<category::amd_smi_power>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
"w", "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::amd_smi_memory_usage>::value, "MemUsg",
|
||||
trait::name<category::amd_smi_memory_usage>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, "MB", "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
trace_cache::get_metadata_registry().add_pmc_info(
|
||||
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::amd_smi_memory_usage>::value, "MemUsg",
|
||||
trait::name<category::amd_smi_memory_usage>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, tim::units::mem_repr(tim::units::megabyte),
|
||||
rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
|
||||
}
|
||||
|
||||
void
|
||||
@@ -159,10 +158,14 @@ rocpd_process_smi_pmc_events(const uint32_t device_id, const amd_smi::settings&
|
||||
if(!(settings.busy || settings.temp || settings.power || settings.mem_usage)) return;
|
||||
|
||||
auto& data_processor = get_data_processor();
|
||||
auto event_id = data_processor.insert_event(ROCPROFSYS_CATEGORY_AMD_SMI, 0, 0, 0);
|
||||
|
||||
const auto* _name = trait::name<category::amd_smi>::value;
|
||||
auto name_primary_key = data_processor.insert_string(_name);
|
||||
auto event_id = data_processor.insert_event(name_primary_key, 0, 0, 0);
|
||||
|
||||
auto& _agent_manager = agent_manager::get_instance();
|
||||
auto base_id = _agent_manager.get_agent_by_id(device_id, agent_type::GPU).base_id;
|
||||
auto base_id =
|
||||
_agent_manager.get_agent_by_type_index(device_id, agent_type::GPU).base_id;
|
||||
|
||||
auto insert_event_and_sample = [&](bool enabled, const char* name, double value) {
|
||||
if(!enabled) return;
|
||||
@@ -392,10 +395,12 @@ config()
|
||||
for(auto itr : data::device_list)
|
||||
data::get_initial().at(itr).sample(itr);
|
||||
|
||||
if(get_use_rocpd())
|
||||
metadata_initialize_category();
|
||||
metadata_initialize_smi_tracks();
|
||||
|
||||
for(const auto& _dev_id : data::device_list)
|
||||
{
|
||||
rocpd_initialize_category();
|
||||
rocpd_initialize_smi_tracks();
|
||||
metadata_initialize_smi_pmc(_dev_id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -483,11 +488,6 @@ data::post_process(uint32_t _dev_id)
|
||||
auto use_perfetto = get_use_perfetto();
|
||||
auto use_rocpd = get_use_rocpd();
|
||||
|
||||
if(use_rocpd)
|
||||
{
|
||||
rocpd_initialize_smi_pmc(_dev_id);
|
||||
}
|
||||
|
||||
for(auto& itr : _amd_smi)
|
||||
{
|
||||
using counter_track = perfetto_counter_track<data>;
|
||||
@@ -788,6 +788,7 @@ setup()
|
||||
|
||||
is_initialized() = true;
|
||||
data::setup();
|
||||
|
||||
} catch(std::runtime_error& _e)
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(0, "Exception thrown when initializing amd-smi: %s\n",
|
||||
|
||||
+139
-141
@@ -23,6 +23,7 @@
|
||||
#include "library/components/backtrace_metrics.hpp"
|
||||
#include "core/agent.hpp"
|
||||
#include "core/agent_manager.hpp"
|
||||
#include "core/common.hpp"
|
||||
#include "core/components/fwd.hpp"
|
||||
#include "core/config.hpp"
|
||||
#include "core/debug.hpp"
|
||||
@@ -167,7 +168,141 @@ get_enabled(tim::type_list<Tp...>)
|
||||
(_v.set(_n++, trait::runtime_enabled<Tp>::get()), ...);
|
||||
return _v;
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_init_categories()
|
||||
{
|
||||
static bool _is_initialized = false;
|
||||
if(_is_initialized) return;
|
||||
|
||||
auto& data_processor = get_data_processor();
|
||||
|
||||
data_processor.insert_string(trait::name<category::thread_cpu_time>::value);
|
||||
data_processor.insert_string(trait::name<category::thread_peak_memory>::value);
|
||||
data_processor.insert_string(trait::name<category::thread_context_switch>::value);
|
||||
data_processor.insert_string(trait::name<category::thread_page_fault>::value);
|
||||
data_processor.insert_string(trait::name<category::thread_hardware_counter>::value);
|
||||
|
||||
_is_initialized = true;
|
||||
}
|
||||
|
||||
template <typename Category>
|
||||
void
|
||||
rocpd_init_tracks(int64_t _tid)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
const auto& t_info = thread_info::get(_tid, SequentTID);
|
||||
auto _tid_name = JOIN("", '[', _tid, ']');
|
||||
|
||||
auto thread_idx = data_processor.insert_thread_info(
|
||||
n_info.id, getppid(), getpid(), t_info->index_data->system_value,
|
||||
JOIN(" ", "Thread", _tid).c_str(), t_info->get_start(), t_info->get_stop(), "{}");
|
||||
|
||||
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
|
||||
{
|
||||
// Initialize hw_counter_tracks and create one track for each hardware counter
|
||||
auto _hw_cnt_labels = *get_papi_labels(_tid);
|
||||
for(auto& itr : _hw_cnt_labels)
|
||||
{
|
||||
std::string _desc = tim::papi::get_event_info(itr).short_descr;
|
||||
if(_desc.empty()) _desc = itr;
|
||||
ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str());
|
||||
|
||||
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
|
||||
data_processor.insert_track(track_name.c_str(), n_info.id, getpid(),
|
||||
thread_idx, "{}");
|
||||
}
|
||||
}
|
||||
else
|
||||
data_processor.insert_track(
|
||||
JOIN('_', trait::name<Category>::value, _tid_name).c_str(), n_info.id,
|
||||
getpid(), thread_idx, "{}");
|
||||
}
|
||||
|
||||
template <typename Category>
|
||||
void
|
||||
rocpd_initialize_backtrace_metrics_pmc(size_t dev_id, const char* units, int64_t _tid)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto _tid_name = JOIN("", '[', _tid, ']');
|
||||
|
||||
size_t EVENT_CODE = 0;
|
||||
size_t INSTANCE_ID = 0;
|
||||
const char* LONG_DESCRIPTION = "";
|
||||
const char* COMPONENT = "";
|
||||
const char* BLOCK = "";
|
||||
const char* EXPRESSION = "";
|
||||
auto ni = node_info::get_instance();
|
||||
const auto TARGET_ARCH = "CPU";
|
||||
|
||||
auto& agent_mngr = agent_manager::get_instance();
|
||||
auto base_id = agent_mngr.get_agent_by_id(dev_id, agent_type::CPU).base_id;
|
||||
|
||||
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
|
||||
{
|
||||
auto _hw_cnt_labels = *get_papi_labels(_tid);
|
||||
for(auto& itr : _hw_cnt_labels)
|
||||
{
|
||||
std::string _desc = tim::papi::get_event_info(itr).short_descr;
|
||||
if(_desc.empty()) _desc = itr;
|
||||
ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str());
|
||||
|
||||
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
track_name.c_str(), trait::name<Category>::value,
|
||||
trait::name<Category>::description, LONG_DESCRIPTION, COMPONENT, units,
|
||||
"ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
JOIN("_", trait::name<Category>::value, _tid_name).c_str(),
|
||||
trait::name<Category>::value, trait::name<Category>::description,
|
||||
LONG_DESCRIPTION, COMPONENT, units, "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
}
|
||||
|
||||
template <typename Category, typename Value>
|
||||
void
|
||||
rocpd_process_backtrace_metrics_events(const uint32_t device_id, uint64_t timestamp,
|
||||
Value value, int64_t _tid)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto _tid_name = JOIN("", '[', _tid, ']');
|
||||
|
||||
auto string_primary_key = data_processor.insert_string(trait::name<Category>::value);
|
||||
auto event_id = data_processor.insert_event(string_primary_key, 0, 0, 0);
|
||||
auto& agent_mngr = agent_manager::get_instance();
|
||||
auto base_id = agent_mngr.get_agent_by_id(device_id, agent_type::CPU).base_id;
|
||||
|
||||
auto insert_event_and_sample = [&](const char* _name, double _value) {
|
||||
data_processor.insert_pmc_event(event_id, base_id, _name, _value);
|
||||
data_processor.insert_sample(_name, timestamp, event_id);
|
||||
};
|
||||
|
||||
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
|
||||
{
|
||||
auto _hw_cnt_labels = *get_papi_labels(_tid);
|
||||
const auto& hw_counters =
|
||||
static_cast<backtrace_metrics::hw_counter_data_t>(value);
|
||||
for(size_t i = 0; i < _hw_cnt_labels.size() && i < hw_counters.size(); ++i)
|
||||
{
|
||||
std::string _desc = tim::papi::get_event_info(_hw_cnt_labels[i]).short_descr;
|
||||
if(_desc.empty()) _desc = _hw_cnt_labels[i];
|
||||
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
|
||||
|
||||
insert_event_and_sample(track_name.c_str(), hw_counters.at(i));
|
||||
}
|
||||
}
|
||||
else
|
||||
insert_event_and_sample(
|
||||
JOIN("_", trait::name<Category>::value, _tid_name).c_str(), value);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void
|
||||
backtrace_metrics::sample(int)
|
||||
{
|
||||
@@ -337,146 +472,6 @@ backtrace_metrics::fini_perfetto(int64_t _tid, valid_array_t _valid)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_init_categories()
|
||||
{
|
||||
static bool _is_initialized = false;
|
||||
if(_is_initialized) return;
|
||||
|
||||
get_data_processor().insert_category(
|
||||
category_enum_id<category::thread_cpu_time>::value,
|
||||
trait::name<category::thread_cpu_time>::value);
|
||||
get_data_processor().insert_category(
|
||||
category_enum_id<category::thread_peak_memory>::value,
|
||||
trait::name<category::thread_peak_memory>::value);
|
||||
get_data_processor().insert_category(
|
||||
category_enum_id<category::thread_context_switch>::value,
|
||||
trait::name<category::thread_context_switch>::value);
|
||||
get_data_processor().insert_category(
|
||||
category_enum_id<category::thread_page_fault>::value,
|
||||
trait::name<category::thread_page_fault>::value);
|
||||
get_data_processor().insert_category(
|
||||
category_enum_id<category::thread_hardware_counter>::value,
|
||||
trait::name<category::thread_hardware_counter>::value);
|
||||
|
||||
_is_initialized = true;
|
||||
}
|
||||
|
||||
template <typename Category>
|
||||
void
|
||||
rocpd_init_tracks(int64_t _tid)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
const auto& t_info = thread_info::get(_tid, SequentTID);
|
||||
auto _tid_name = JOIN("", '[', _tid, ']');
|
||||
|
||||
auto thread_idx = data_processor.insert_thread_info(
|
||||
n_info.id, getppid(), getpid(), t_info->index_data->system_value,
|
||||
JOIN(" ", "Thread", _tid).c_str(), t_info->get_start(), t_info->get_stop(), "{}");
|
||||
|
||||
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
|
||||
{
|
||||
// Initialize hw_counter_tracks and create one track for each hardware counter
|
||||
auto _hw_cnt_labels = *get_papi_labels(_tid);
|
||||
for(auto& itr : _hw_cnt_labels)
|
||||
{
|
||||
std::string _desc = tim::papi::get_event_info(itr).short_descr;
|
||||
if(_desc.empty()) _desc = itr;
|
||||
ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str());
|
||||
|
||||
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
|
||||
data_processor.insert_track(track_name.c_str(), n_info.id, getpid(),
|
||||
thread_idx, "{}");
|
||||
}
|
||||
}
|
||||
else
|
||||
data_processor.insert_track(
|
||||
JOIN('_', trait::name<Category>::value, _tid_name).c_str(), n_info.id,
|
||||
getpid(), thread_idx, "{}");
|
||||
}
|
||||
|
||||
template <typename Category>
|
||||
void
|
||||
rocpd_initialize_backtrace_metrics_pmc(size_t dev_id, const char* units, int64_t _tid)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto _tid_name = JOIN("", '[', _tid, ']');
|
||||
|
||||
size_t EVENT_CODE = 0;
|
||||
size_t INSTANCE_ID = 0;
|
||||
const char* LONG_DESCRIPTION = "";
|
||||
const char* COMPONENT = "";
|
||||
const char* BLOCK = "";
|
||||
const char* EXPRESSION = "";
|
||||
auto ni = node_info::get_instance();
|
||||
const auto* TARGET_ARCH = "CPU";
|
||||
|
||||
auto& _agent_manager = agent_manager::get_instance();
|
||||
auto _base_id = _agent_manager.get_agent_by_id(dev_id, agent_type::CPU).base_id;
|
||||
|
||||
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
|
||||
{
|
||||
auto _hw_cnt_labels = *get_papi_labels(_tid);
|
||||
for(auto& itr : _hw_cnt_labels)
|
||||
{
|
||||
std::string _desc = tim::papi::get_event_info(itr).short_descr;
|
||||
if(_desc.empty()) _desc = itr;
|
||||
ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str());
|
||||
|
||||
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), _base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
track_name.c_str(), trait::name<Category>::value,
|
||||
trait::name<Category>::description, LONG_DESCRIPTION, COMPONENT, units,
|
||||
"ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), _base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
JOIN("_", trait::name<Category>::value, _tid_name).c_str(),
|
||||
trait::name<Category>::value, trait::name<Category>::description,
|
||||
LONG_DESCRIPTION, COMPONENT, units, "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
}
|
||||
|
||||
template <typename Category, typename Value>
|
||||
void
|
||||
rocpd_process_backtrace_metrics_events(const uint32_t device_id, uint64_t timestamp,
|
||||
Value value, int64_t _tid)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto _tid_name = JOIN("", '[', _tid, ']');
|
||||
auto event_id =
|
||||
data_processor.insert_event(category_enum_id<Category>::value, 0, 0, 0);
|
||||
auto& agent_mngr = agent_manager::get_instance();
|
||||
auto base_id = agent_mngr.get_agent_by_id(device_id, agent_type::CPU).base_id;
|
||||
|
||||
auto insert_event_and_sample = [&](const char* name, double _value) {
|
||||
data_processor.insert_pmc_event(event_id, base_id, name, _value);
|
||||
data_processor.insert_sample(name, timestamp, event_id);
|
||||
};
|
||||
|
||||
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
|
||||
{
|
||||
auto _hw_cnt_labels = *get_papi_labels(_tid);
|
||||
const auto& _hw_counters =
|
||||
static_cast<backtrace_metrics::hw_counter_data_t>(value);
|
||||
for(size_t i = 0; i < _hw_cnt_labels.size() && i < _hw_counters.size(); ++i)
|
||||
{
|
||||
std::string _desc = tim::papi::get_event_info(_hw_cnt_labels[i]).short_descr;
|
||||
if(_desc.empty()) _desc = _hw_cnt_labels[i];
|
||||
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
|
||||
|
||||
insert_event_and_sample(track_name.c_str(), _hw_counters.at(i));
|
||||
}
|
||||
}
|
||||
else
|
||||
insert_event_and_sample(
|
||||
JOIN("_", trait::name<Category>::value, _tid_name).c_str(), value);
|
||||
}
|
||||
|
||||
void
|
||||
backtrace_metrics::init_rocpd(int64_t _tid, valid_array_t _valid)
|
||||
{
|
||||
@@ -641,8 +636,10 @@ backtrace_metrics::post_process_perfetto(int64_t _tid, uint64_t _ts) const
|
||||
}
|
||||
|
||||
void
|
||||
backtrace_metrics::post_process_rocpd(int64_t _tid, uint64_t _ts) const
|
||||
backtrace_metrics::post_process_rocpd([[maybe_unused]] int64_t _tid,
|
||||
[[maybe_unused]] uint64_t _ts) const
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto is_category_enabled = [&](const auto& _category) { return (*this)(_category); };
|
||||
|
||||
if(is_category_enabled(category::thread_cpu_time{}))
|
||||
@@ -675,6 +672,7 @@ backtrace_metrics::post_process_rocpd(int64_t _tid, uint64_t _ts) const
|
||||
hw_counter_data_t>(0, _ts, m_hw_counter,
|
||||
_tid);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} // namespace component
|
||||
} // namespace rocprofsys
|
||||
|
||||
+57
-101
@@ -26,7 +26,8 @@
|
||||
#include "core/config.hpp"
|
||||
#include "core/node_info.hpp"
|
||||
#include "core/perfetto.hpp"
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "core/trace_cache/cache_manager.hpp"
|
||||
#include "core/trace_cache/sample_type.hpp"
|
||||
#include "library/tracing.hpp"
|
||||
|
||||
#include <timemory/units.hpp>
|
||||
@@ -76,41 +77,25 @@ write_perfetto_counter_track(uint64_t _val)
|
||||
|
||||
namespace
|
||||
{
|
||||
rocpd::data_processor&
|
||||
get_data_processor()
|
||||
{
|
||||
return rocpd::data_processor::get_instance();
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_comm_data_categories()
|
||||
metadata_initialize_comm_data_categories()
|
||||
{
|
||||
static bool _is_initialized = false;
|
||||
if(_is_initialized) return;
|
||||
|
||||
get_data_processor().insert_category(category_enum_id<category::comm_data>::value,
|
||||
trait::name<category::comm_data>::value);
|
||||
#if defined(ROCPROFSYS_USE_MPI)
|
||||
get_data_processor().insert_category(category_enum_id<category::mpi>::value,
|
||||
trait::name<category::mpi>::value);
|
||||
#endif
|
||||
#if defined(ROCPROFSYS_USE_RCCL)
|
||||
get_data_processor().insert_category(category_enum_id<category::rocm_rccl>::value,
|
||||
trait::name<category::rocm_rccl>::value);
|
||||
#endif
|
||||
trace_cache::get_metadata_registry().add_string(
|
||||
trait::name<category::comm_data>::value);
|
||||
trace_cache::get_metadata_registry().add_string(trait::name<category::mpi>::value);
|
||||
|
||||
_is_initialized = true;
|
||||
}
|
||||
|
||||
template <typename Track>
|
||||
void
|
||||
rocpd_initialize_track()
|
||||
metadata_initialize_track()
|
||||
{
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto thread_id = std::nullopt;
|
||||
auto _init_track = [&](const char* label) {
|
||||
ROCPROFSYS_VERBOSE(3, "INSERT_TRACK label: %s, node ID: %d, Process ID: %d",
|
||||
label, n_info.id, getpid());
|
||||
get_data_processor().insert_track(label, n_info.id, getpid(), thread_id);
|
||||
auto _init_track = [&](const char* label) {
|
||||
trace_cache::get_metadata_registry().add_track({ label, std::nullopt, "{}" });
|
||||
};
|
||||
|
||||
static std::once_flag _once{};
|
||||
@@ -118,9 +103,8 @@ rocpd_initialize_track()
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_comm_data_pmc()
|
||||
metadata_initialize_comm_data_pmc()
|
||||
{
|
||||
[[maybe_unused]] auto& data_processor = get_data_processor();
|
||||
// find the proper values for a following definitions
|
||||
[[maybe_unused]] size_t EVENT_CODE = 0;
|
||||
[[maybe_unused]] size_t INSTANCE_ID = 0;
|
||||
@@ -131,56 +115,28 @@ rocpd_initialize_comm_data_pmc()
|
||||
[[maybe_unused]] constexpr const char* MSG = "bytes";
|
||||
[[maybe_unused]] constexpr const auto* TARGET_ARCH = "CPU";
|
||||
auto ni = node_info::get_instance();
|
||||
constexpr const auto DEVICE_ID = 0; // Assuming CPU device ID is 0
|
||||
|
||||
auto& _agent_manager = agent_manager::get_instance();
|
||||
[[maybe_unused]] auto base_id =
|
||||
_agent_manager.get_agent_by_id(DEVICE_ID, agent_type::CPU).base_id;
|
||||
[[maybe_unused]] constexpr const auto DEVICE_ID = 0; // Assuming CPU device ID is 0
|
||||
|
||||
#if defined(ROCPROFSYS_USE_MPI)
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
comm_data::mpi_send::label, "Tracks MPI Send communication data sizes",
|
||||
trait::name<category::mpi>::description, LONG_DESCRIPTION, COMPONENT, MSG, "ABS",
|
||||
BLOCK, EXPRESSION, 0, 0);
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
comm_data::mpi_recv::label, "Tracks MPI Receive communication data sizes",
|
||||
trait::name<category::mpi>::description, LONG_DESCRIPTION, COMPONENT, MSG, "ABS",
|
||||
BLOCK, EXPRESSION, 0, 0);
|
||||
#endif
|
||||
#if defined(ROCPROFSYS_USE_RCCL)
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, rccl_send::label,
|
||||
"Tracks RCCL Send communication data sizes",
|
||||
trait::name<category::rocm_rccl>::description, LONG_DESCRIPTION, COMPONENT, MSG,
|
||||
"ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, rccl_recv::label,
|
||||
"Tracks RCCL Receive communication data sizes",
|
||||
trait::name<category::rocm_rccl>::description, LONG_DESCRIPTION, COMPONENT, MSG,
|
||||
"ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
trace_cache::get_metadata_registry().add_pmc_info(
|
||||
{ agent_type::CPU, DEVICE_ID, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
comm_data::mpi_send::label, "Tracks MPI communication data sizes",
|
||||
trait::name<category::mpi>::description, LONG_DESCRIPTION, COMPONENT, MSG,
|
||||
rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
|
||||
trace_cache::get_metadata_registry().add_pmc_info(
|
||||
{ agent_type::CPU, DEVICE_ID, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
comm_data::mpi_recv::label, "Tracks MPI communication data sizes",
|
||||
trait::name<category::mpi>::description, LONG_DESCRIPTION, COMPONENT, MSG,
|
||||
rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename Track>
|
||||
void
|
||||
rocpd_process_cpu_usage_events(const uint32_t device_id, int bytes)
|
||||
cache_comm_data_events(const uint32_t device_id, int bytes)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto event_id = data_processor.insert_event(
|
||||
category_enum_id<category::comm_data>::value, 0, 0, 0);
|
||||
|
||||
auto& agents = agent_manager::get_instance();
|
||||
auto agent = agents.get_agent_by_id(device_id, agent_type::CPU);
|
||||
|
||||
auto insert_event_and_sample = [&](const char* name, uint64_t timestamp,
|
||||
double value) {
|
||||
data_processor.insert_pmc_event(event_id, agent.device_id, name, value);
|
||||
data_processor.insert_sample(name, timestamp, event_id);
|
||||
};
|
||||
auto agent = agents.get_agent_by_type_index(device_id, agent_type::CPU);
|
||||
|
||||
static std::mutex _mutex{};
|
||||
static uint64_t value = 0;
|
||||
@@ -190,8 +146,20 @@ rocpd_process_cpu_usage_events(const uint32_t device_id, int bytes)
|
||||
_now = rocprofsys::tracing::now<uint64_t>();
|
||||
bytes = (value += bytes);
|
||||
}
|
||||
const std::string track_name = Track::label;
|
||||
const size_t timestamp_ns = _now;
|
||||
const std::string event_metadata = "{}";
|
||||
const size_t stack_id = 0;
|
||||
const size_t parent_stack_id = 0;
|
||||
const size_t correlation_id = 0;
|
||||
const std::string call_stack = "{}";
|
||||
const std::string line_info = "{}";
|
||||
const size_t agent_handle = agent.handle;
|
||||
|
||||
insert_event_and_sample(Track::label, _now, bytes);
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::pmc_event_with_sample, track_name.c_str(), timestamp_ns,
|
||||
event_metadata.c_str(), stack_id, parent_stack_id, correlation_id,
|
||||
call_stack.c_str(), line_info.c_str(), agent_handle, track_name.c_str(), value);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@@ -199,10 +167,14 @@ rocpd_process_cpu_usage_events(const uint32_t device_id, int bytes)
|
||||
void
|
||||
comm_data::start()
|
||||
{
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_comm_data_categories();
|
||||
rocpd_initialize_comm_data_pmc();
|
||||
metadata_initialize_comm_data_categories();
|
||||
metadata_initialize_comm_data_pmc();
|
||||
|
||||
#if defined(ROCPROFSYS_USE_MPI)
|
||||
metadata_initialize_track<mpi_send>();
|
||||
metadata_initialize_track<mpi_recv>();
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -248,10 +220,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int cou
|
||||
|
||||
write_perfetto_counter_track<mpi_send>(count * _size);
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_track<mpi_send>();
|
||||
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
|
||||
cache_comm_data_events<mpi_send>(0, count * _size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
@@ -276,10 +246,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count,
|
||||
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<mpi_recv>(count * _size);
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_track<mpi_recv>();
|
||||
rocpd_process_cpu_usage_events<mpi_recv>(0, count * _size);
|
||||
cache_comm_data_events<mpi_recv>(0, count * _size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
@@ -304,10 +272,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int cou
|
||||
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<mpi_send>(count * _size);
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_track<mpi_send>();
|
||||
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
|
||||
cache_comm_data_events<mpi_send>(0, count * _size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
@@ -332,10 +298,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count,
|
||||
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<mpi_recv>(count * _size);
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_track<mpi_recv>();
|
||||
rocpd_process_cpu_usage_events<mpi_recv>(0, count * _size);
|
||||
cache_comm_data_events<mpi_recv>(0, count * _size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
@@ -360,10 +324,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count,
|
||||
|
||||
if(get_use_perfetto()) write_perfetto_counter_track<mpi_send>(count * _size);
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_track<mpi_send>();
|
||||
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
|
||||
cache_comm_data_events<mpi_send>(0, count * _size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
@@ -389,12 +351,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, void*,
|
||||
write_perfetto_counter_track<mpi_send>(count * _size);
|
||||
}
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_track<mpi_send>();
|
||||
rocpd_initialize_track<mpi_recv>();
|
||||
rocpd_process_cpu_usage_events<mpi_recv>(0, count * _size);
|
||||
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
|
||||
cache_comm_data_events<mpi_recv>(0, count * _size);
|
||||
cache_comm_data_events<mpi_send>(0, count * _size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory()) add(_data, count * _size);
|
||||
@@ -416,10 +375,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen
|
||||
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
|
||||
}
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_process_cpu_usage_events<mpi_send>(0, sendcount * _send_size);
|
||||
rocpd_process_cpu_usage_events<mpi_recv>(0, recvcount * _send_size);
|
||||
cache_comm_data_events<mpi_send>(0, sendcount * _send_size);
|
||||
cache_comm_data_events<mpi_recv>(0, recvcount * _send_size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
@@ -467,10 +425,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen
|
||||
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
|
||||
}
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_process_cpu_usage_events<mpi_send>(0, sendcount * _send_size);
|
||||
rocpd_process_cpu_usage_events<mpi_recv>(0, recvcount * _send_size);
|
||||
cache_comm_data_events<mpi_send>(0, sendcount * _send_size);
|
||||
cache_comm_data_events<mpi_recv>(0, recvcount * _send_size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
@@ -501,10 +458,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen
|
||||
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
|
||||
}
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_process_cpu_usage_events<mpi_send>(0, sendcount * _send_size);
|
||||
rocpd_process_cpu_usage_events<mpi_recv>(0, recvcount * _recv_size);
|
||||
cache_comm_data_events<mpi_send>(0, sendcount * _send_size);
|
||||
cache_comm_data_events<mpi_recv>(0, recvcount * _recv_size);
|
||||
}
|
||||
|
||||
if(rocprofsys::get_use_timemory())
|
||||
|
||||
@@ -29,6 +29,8 @@
|
||||
#include "core/node_info.hpp"
|
||||
#include "core/perfetto.hpp"
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "core/timemory.hpp"
|
||||
#include "core/trace_cache/cache_manager.hpp"
|
||||
#include "library/components/cpu_freq.hpp"
|
||||
#include "library/thread_info.hpp"
|
||||
|
||||
@@ -91,54 +93,45 @@ get_data_processor()
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_cpu_freq_category()
|
||||
metadata_initialize_cpu_freq_category()
|
||||
{
|
||||
get_data_processor().insert_category(ROCPROFSYS_CATEGORY_CPU_FREQ,
|
||||
trait::name<category::cpu_freq>::value);
|
||||
trace_cache::get_metadata_registry().add_string(
|
||||
trait::name<category::cpu_freq>::value);
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_cpu_freq_tracks()
|
||||
metadata_initialize_cpu_freq_tracks()
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
const auto thread_idx = std::nullopt; // Internal thread ID for cpu-freq
|
||||
|
||||
do_for_enabled_cpus([&](size_t cpu_id) {
|
||||
data_processor.insert_track(
|
||||
get_cpu_freq_track_name<category::cpu_freq>(cpu_id).c_str(), n_info.id,
|
||||
getpid(), thread_idx);
|
||||
trace_cache::get_metadata_registry().add_track(
|
||||
{ get_cpu_freq_track_name<category::cpu_freq>(cpu_id).c_str(), std::nullopt,
|
||||
"{}" });
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_cpu_usage_tracks()
|
||||
metadata_initialize_cpu_usage_tracks()
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
const auto thread_idx = std::nullopt; // Internal thread ID for cpu-freq
|
||||
|
||||
data_processor.insert_track(trait::name<category::process_page>::value, n_info.id,
|
||||
getpid(), thread_idx);
|
||||
data_processor.insert_track(trait::name<category::process_virt>::value, n_info.id,
|
||||
getpid(), thread_idx);
|
||||
data_processor.insert_track(trait::name<category::process_peak>::value, n_info.id,
|
||||
getpid(), thread_idx);
|
||||
data_processor.insert_track(trait::name<category::process_context_switch>::value,
|
||||
n_info.id, getpid(), thread_idx);
|
||||
data_processor.insert_track(trait::name<category::process_page_fault>::value,
|
||||
n_info.id, getpid(), thread_idx);
|
||||
data_processor.insert_track(trait::name<category::process_user_mode_time>::value,
|
||||
n_info.id, getpid(), thread_idx);
|
||||
data_processor.insert_track(trait::name<category::process_kernel_mode_time>::value,
|
||||
n_info.id, getpid(), thread_idx);
|
||||
trace_cache::get_metadata_registry().add_track(
|
||||
{ trait::name<category::process_page>::value, std::nullopt, "{}" });
|
||||
trace_cache::get_metadata_registry().add_track(
|
||||
{ trait::name<category::process_virt>::value, std::nullopt, "{}" });
|
||||
trace_cache::get_metadata_registry().add_track(
|
||||
{ trait::name<category::process_peak>::value, std::nullopt, "{}" });
|
||||
trace_cache::get_metadata_registry().add_track(
|
||||
{ trait::name<category::process_context_switch>::value, std::nullopt, "{}" });
|
||||
trace_cache::get_metadata_registry().add_track(
|
||||
{ trait::name<category::process_page_fault>::value, std::nullopt, "{}" });
|
||||
trace_cache::get_metadata_registry().add_track(
|
||||
{ trait::name<category::process_user_mode_time>::value, std::nullopt, "{}" });
|
||||
trace_cache::get_metadata_registry().add_track(
|
||||
{ trait::name<category::process_kernel_mode_time>::value, std::nullopt, "{}" });
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_cpu_freq_pmc(size_t dev_id)
|
||||
metadata_initialize_cpu_freq_pmc(size_t dev_id)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
// find the proper values for a following definitions
|
||||
// TODO: Find the proper values for a following definitions
|
||||
size_t EVENT_CODE = 0;
|
||||
size_t INSTANCE_ID = 0;
|
||||
const char* LONG_DESCRIPTION = "";
|
||||
@@ -150,58 +143,56 @@ rocpd_initialize_cpu_freq_pmc(size_t dev_id)
|
||||
auto ni = node_info::get_instance();
|
||||
const auto* TARGET_ARCH = "CPU";
|
||||
|
||||
auto& _agent_manager = agent_manager::get_instance();
|
||||
auto base_id = _agent_manager.get_agent_by_id(dev_id, agent_type::CPU).base_id;
|
||||
|
||||
do_for_enabled_cpus([&](size_t cpu_id) {
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
get_cpu_freq_track_name<category::cpu_freq>(cpu_id).c_str(), "Frequency",
|
||||
trait::name<category::cpu_freq>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
component::cpu_freq::display_unit().c_str(), "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
trace_cache::get_metadata_registry().add_pmc_info(
|
||||
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
get_cpu_freq_track_name<category::cpu_freq>(cpu_id).c_str(), "Frequency",
|
||||
trait::name<category::cpu_freq>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
component::cpu_freq::display_unit().c_str(),
|
||||
rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
|
||||
});
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_page>::value, "Memory Usage",
|
||||
trait::name<category::process_page>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
MEMORY, "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
trace_cache::get_metadata_registry().add_pmc_info(
|
||||
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_page>::value, "Memory Usage",
|
||||
trait::name<category::process_page>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
MEMORY, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_virt>::value, "Virtual Memory Usage",
|
||||
trait::name<category::process_virt>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
MEMORY, "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
trace_cache::get_metadata_registry().add_pmc_info(
|
||||
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_virt>::value, "Virtual Memory Usage",
|
||||
trait::name<category::process_virt>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
MEMORY, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_peak>::value, "Peak Memory",
|
||||
trait::name<category::process_peak>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
MEMORY, "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
trace_cache::get_metadata_registry().add_pmc_info(
|
||||
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_peak>::value, "Peak Memory",
|
||||
trait::name<category::process_peak>::description, LONG_DESCRIPTION, COMPONENT,
|
||||
MEMORY, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_context_switch>::value, "Context Switches",
|
||||
trait::name<category::process_context_switch>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, "", "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
trace_cache::get_metadata_registry().add_pmc_info(
|
||||
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_context_switch>::value, "Context Switches",
|
||||
trait::name<category::process_context_switch>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, "", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_page_fault>::value, "Page Faults",
|
||||
trait::name<category::process_page_fault>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, "", "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
trace_cache::get_metadata_registry().add_pmc_info(
|
||||
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_page_fault>::value, "Page Faults",
|
||||
trait::name<category::process_page_fault>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, "", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_user_mode_time>::value, "User Time",
|
||||
trait::name<category::process_user_mode_time>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, TIME, "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
trace_cache::get_metadata_registry().add_pmc_info(
|
||||
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_user_mode_time>::value, "User Time",
|
||||
trait::name<category::process_user_mode_time>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, TIME, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
|
||||
|
||||
data_processor.insert_pmc_description(
|
||||
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_kernel_mode_time>::value, "Kernel Time",
|
||||
trait::name<category::process_kernel_mode_time>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, TIME, "ABS", BLOCK, EXPRESSION, 0, 0);
|
||||
trace_cache::get_metadata_registry().add_pmc_info(
|
||||
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
|
||||
trait::name<category::process_kernel_mode_time>::value, "Kernel Time",
|
||||
trait::name<category::process_kernel_mode_time>::description, LONG_DESCRIPTION,
|
||||
COMPONENT, TIME, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
|
||||
}
|
||||
|
||||
void
|
||||
@@ -211,11 +202,13 @@ rocpd_process_cpu_usage_events(const uint32_t device_id, uint64_t timestamp,
|
||||
double context_switch, double page_fault, double user_time,
|
||||
double kernel_time)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto event_id = data_processor.insert_event(ROCPROFSYS_CATEGORY_CPU_FREQ, 0, 0, 0);
|
||||
auto& data_processor = get_data_processor();
|
||||
const auto* _name = trait::name<category::cpu_freq>::value;
|
||||
auto name_primary_key = data_processor.insert_string(_name);
|
||||
auto event_id = data_processor.insert_event(name_primary_key, 0, 0, 0);
|
||||
|
||||
auto& agent_mngr = agent_manager::get_instance();
|
||||
auto base_id = agent_mngr.get_agent_by_id(device_id, agent_type::CPU).base_id;
|
||||
auto base_id = agent_mngr.get_agent_by_type_index(device_id, agent_type::CPU).base_id;
|
||||
|
||||
auto insert_event_and_sample = [&](const char* name, double value) {
|
||||
data_processor.insert_pmc_event(event_id, base_id, name, value);
|
||||
@@ -258,6 +251,21 @@ setup()
|
||||
category::process_page_fault, category::process_user_mode_time,
|
||||
category::process_kernel_mode_time>{});
|
||||
}
|
||||
|
||||
metadata_initialize_cpu_freq_category();
|
||||
metadata_initialize_cpu_usage_tracks();
|
||||
metadata_initialize_cpu_freq_tracks();
|
||||
|
||||
// `get_enabled_cpus()` returns the number of cores enabled for monitoring but
|
||||
// the actual device_id is 0, since there is a single device available. And
|
||||
// the agents seems to be assigned per device basis not per core.
|
||||
// TODO: `get_enabled_cpus()` should be fixed in the future to align with GPU
|
||||
// implementation.
|
||||
auto cpu_agents = agent_manager::get_instance().get_agents_by_type(agent_type::CPU);
|
||||
for(auto& agent : cpu_agents)
|
||||
{
|
||||
metadata_initialize_cpu_freq_pmc(agent->device_id);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -343,25 +351,6 @@ post_process()
|
||||
|
||||
auto& enabled_cpus = component::cpu_freq::get_enabled_cpus();
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_cpu_freq_category();
|
||||
rocpd_initialize_cpu_usage_tracks();
|
||||
rocpd_initialize_cpu_freq_tracks();
|
||||
|
||||
// `get_enabled_cpus()` returns the number of cores enabled for monitoring but the
|
||||
// actually device_id is 0, since there is a single device available. And the
|
||||
// agents seems to be assigned per device basis not per core.
|
||||
// TODO: `get_enabled_cpus()` should be fixed in the future to align with GPU
|
||||
// implementation.
|
||||
auto cpu_agents =
|
||||
agent_manager::get_instance().get_agents_by_type(agent_type::CPU);
|
||||
for(auto& agent : cpu_agents)
|
||||
{
|
||||
rocpd_initialize_cpu_freq_pmc(agent->device_id);
|
||||
}
|
||||
}
|
||||
|
||||
auto _process_frequencies = [](size_t _idx, size_t _offset) {
|
||||
using freq_track = perfetto_counter_track<category::cpu_freq>;
|
||||
|
||||
|
||||
@@ -31,10 +31,12 @@
|
||||
#include "core/defines.hpp"
|
||||
#include "core/node_info.hpp"
|
||||
#include "core/perfetto.hpp"
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "core/rocpd/json.hpp"
|
||||
#include "core/trace_cache/cache_manager.hpp"
|
||||
#include "core/trace_cache/sample_type.hpp"
|
||||
#include "library/components/category_region.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
#include <optional>
|
||||
|
||||
#include <timemory/api/kokkosp.hpp>
|
||||
#include <timemory/backends/process.hpp>
|
||||
@@ -157,47 +159,41 @@ violates_name_rules(Arg&& _arg, Args&&... _args)
|
||||
|
||||
namespace
|
||||
{
|
||||
rocprofsys::rocpd::data_processor&
|
||||
get_data_processor()
|
||||
{
|
||||
return rocprofsys::rocpd::data_processor::get_instance();
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_kokkos_category()
|
||||
metadata_initialize_kokkos_category()
|
||||
{
|
||||
get_data_processor().insert_category(
|
||||
rocprofsys::category_enum_id<category::kokkos>::value,
|
||||
rocprofsys::trace_cache::get_metadata_registry().add_string(
|
||||
rocprofsys::trait::name<category::kokkos>::value);
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_kokkos_track()
|
||||
metadata_initialize_kokkos_track()
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = rocprofsys::node_info::get_instance();
|
||||
auto thread_id = std::nullopt;
|
||||
|
||||
data_processor.insert_track(rocprofsys::trait::name<category::kokkos>::value,
|
||||
n_info.id, getpid(), thread_id);
|
||||
rocprofsys::trace_cache::get_metadata_registry().add_track(
|
||||
{ rocprofsys::trait::name<category::kokkos>::value, std::nullopt, "{}" });
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_process_kokkos_event(const char* name, const char* event_type, const char* target,
|
||||
uint64_t timestamp_ns)
|
||||
cache_kokkos_event(const char* name, const char* event_type, const char* target,
|
||||
uint64_t timestamp_ns)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto event_metadata = rocpd::json::create();
|
||||
auto event_metadata = rocpd::json::create();
|
||||
|
||||
event_metadata->set("name", name);
|
||||
event_metadata->set("event_type", event_type);
|
||||
event_metadata->set("target", target);
|
||||
|
||||
auto event_id = data_processor.insert_event(
|
||||
rocprofsys::category_enum_id<category::kokkos>::value, 0, 0, 0, "{}", "{}",
|
||||
event_metadata->to_string().c_str());
|
||||
data_processor.insert_sample(rocprofsys::trait::name<category::kokkos>::value,
|
||||
timestamp_ns, event_id, "{}");
|
||||
const size_t stack_id = 0;
|
||||
const size_t parent_stack_id = 0;
|
||||
const size_t correlation_id = 0;
|
||||
const char* call_stack = "{}";
|
||||
const char* line_info = "{}";
|
||||
|
||||
rocprofsys::trace_cache::get_buffer_storage().store(
|
||||
rocprofsys::trace_cache::entry_type::in_time_sample,
|
||||
rocprofsys::trait::name<category::kokkos>::value, timestamp_ns,
|
||||
event_metadata->to_string().c_str(), stack_id, parent_stack_id, correlation_id,
|
||||
call_stack, line_info);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@@ -308,11 +304,8 @@ extern "C"
|
||||
rocprofsys_init_hidden(_mode.c_str(), false, _arg0.c_str());
|
||||
rocprofsys_push_trace_hidden("kokkos_main");
|
||||
|
||||
if(rocprofsys::get_use_rocpd())
|
||||
{
|
||||
rocpd_initialize_kokkos_category();
|
||||
rocpd_initialize_kokkos_track();
|
||||
}
|
||||
metadata_initialize_kokkos_category();
|
||||
metadata_initialize_kokkos_track();
|
||||
}
|
||||
|
||||
setup_kernel_logger();
|
||||
@@ -619,12 +612,8 @@ extern "C"
|
||||
kokkosp::profiler_t<kokkosp_region>{ _name }.mark();
|
||||
}
|
||||
|
||||
if(rocprofsys::config::get_use_rocpd())
|
||||
{
|
||||
rocpd_process_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(),
|
||||
"[dual_view_sync]",
|
||||
(is_device) ? "device" : "host", timestamp);
|
||||
}
|
||||
cache_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(), "[dual_view_sync]",
|
||||
(is_device) ? "device" : "host", timestamp);
|
||||
}
|
||||
|
||||
void kokkosp_dual_view_modify(const char* label, const void* const, bool is_device)
|
||||
@@ -648,12 +637,8 @@ extern "C"
|
||||
kokkosp::profiler_t<kokkosp_region>{ _name }.mark();
|
||||
}
|
||||
|
||||
if(rocprofsys::config::get_use_rocpd())
|
||||
{
|
||||
rocpd_process_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(),
|
||||
"[dual_view_modify]",
|
||||
(is_device) ? "device" : "host", timestamp);
|
||||
}
|
||||
cache_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(), "[dual_view_modify]",
|
||||
(is_device) ? "device" : "host", timestamp);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------//
|
||||
|
||||
+493
-56
@@ -20,24 +20,31 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "library/rocprofiler-sdk.hpp"
|
||||
#include "core/rocprofiler-sdk.hpp"
|
||||
#include "api.hpp"
|
||||
#include "common/synchronized.hpp"
|
||||
#include "core/common.hpp"
|
||||
#include "core/config.hpp"
|
||||
#include "core/containers/stable_vector.hpp"
|
||||
#include "core/debug.hpp"
|
||||
#include "core/gpu.hpp"
|
||||
#include "core/perfetto.hpp"
|
||||
#include "core/rocprofiler-sdk.hpp"
|
||||
#include "core/rocpd/json.hpp"
|
||||
#include "core/state.hpp"
|
||||
#include "core/trace_cache/buffer_storage.hpp"
|
||||
#include "core/trace_cache/cache_manager.hpp"
|
||||
#include "core/trace_cache/metadata_registry.hpp"
|
||||
#include "core/trace_cache/sample_type.hpp"
|
||||
#include "library/amd_smi.hpp"
|
||||
#include "library/components/category_region.hpp"
|
||||
#include "library/rocprofiler-sdk.hpp"
|
||||
#include "library/rocprofiler-sdk/counters.hpp"
|
||||
#include "library/rocprofiler-sdk/fwd.hpp"
|
||||
#include "library/rocprofiler-sdk/rccl.hpp"
|
||||
#include "library/thread_info.hpp"
|
||||
#include "library/tracing.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <timemory/components/timing/wall_clock.hpp>
|
||||
#include <timemory/hash/types.hpp>
|
||||
#include <timemory/unwind/processed_entry.hpp>
|
||||
@@ -247,7 +254,7 @@ create_agent_profile(rocprofiler_agent_id_t agent_id,
|
||||
ROCPROFSYS_ABORT_F(
|
||||
"Unable to find all counters for agent %i (gpu-%li, %s) in %s. Found: %s\n",
|
||||
tool_agent_v->agent->node_id, tool_agent_v->device_id,
|
||||
tool_agent_v->agent->name, requested_counters.c_str(),
|
||||
tool_agent_v->agent->name.c_str(), requested_counters.c_str(),
|
||||
found_counters.c_str());
|
||||
}
|
||||
|
||||
@@ -270,6 +277,12 @@ get_kernel_symbol_info(uint64_t _kernel_id)
|
||||
return tool_data->get_kernel_symbol_info(_kernel_id);
|
||||
}
|
||||
|
||||
const rocprofiler_callback_tracing_code_object_load_data_t*
|
||||
get_code_object_info(uint64_t _code_object_id)
|
||||
{
|
||||
return tool_data->get_code_object_info(_code_object_id);
|
||||
}
|
||||
|
||||
// Implementation of rocprofiler_callback_tracing_operation_args_cb_t
|
||||
int
|
||||
save_args(rocprofiler_callback_tracing_kind_t /*kind*/, int32_t /*operation*/,
|
||||
@@ -283,6 +296,22 @@ save_args(rocprofiler_callback_tracing_kind_t /*kind*/, int32_t /*operation*/,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Additional implementation of rocprofiler_callback_tracing_operation_args_cb_t
|
||||
// for iterating through arguments in a callback for rocpd_arg table in database
|
||||
int
|
||||
iterate_args_callback(rocprofiler_callback_tracing_kind_t /*kind*/, int32_t /*operation*/,
|
||||
uint32_t arg_number, const void* const /*arg_value_addr*/,
|
||||
int32_t /*arg_indirection_count*/, const char* arg_type,
|
||||
const char* arg_name, const char* arg_value_str,
|
||||
int32_t /*arg_dereference_count*/, void* data)
|
||||
{
|
||||
auto* _data = static_cast<function_args_t*>(data);
|
||||
if(arg_type && arg_name && arg_value_str)
|
||||
_data->emplace_back(
|
||||
argument_info{ arg_number, demangle(arg_type), arg_name, arg_value_str });
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_marker_pushed_ranges()
|
||||
{
|
||||
@@ -299,6 +328,294 @@ get_marker_started_ranges()
|
||||
return _v;
|
||||
}
|
||||
|
||||
template <typename Tp, typename... Args>
|
||||
Tp*
|
||||
as_pointer(Args&&... _args)
|
||||
{
|
||||
return new Tp{ std::forward<Args>(_args)... };
|
||||
}
|
||||
|
||||
template <typename... Tp>
|
||||
void
|
||||
consume_args(Tp&&...)
|
||||
{}
|
||||
|
||||
auto
|
||||
get_backtrace(std::optional<std::vector<tim::unwind::processed_entry>>& _bt_data)
|
||||
{
|
||||
auto backtrace = ::rocpd::json::create();
|
||||
if(_bt_data && !_bt_data->empty())
|
||||
{
|
||||
const std::string _unk = "??";
|
||||
size_t _bt_cnt = 0;
|
||||
for(const auto& itr : *_bt_data)
|
||||
{
|
||||
auto _linfo = itr.lineinfo.get();
|
||||
const auto* _func = (itr.name.empty()) ? &_unk : &itr.name;
|
||||
const auto* _loc = (_linfo && !_linfo.location.empty())
|
||||
? &_linfo.location
|
||||
: ((itr.location.empty()) ? &_unk : &itr.location);
|
||||
auto _line =
|
||||
(_linfo && _linfo.line > 0)
|
||||
? join("", _linfo.line)
|
||||
: ((itr.lineno == 0) ? std::string{ "?" } : join("", itr.lineno));
|
||||
auto _entry = join("", demangle(*_func), " @ ",
|
||||
join(':', ::basename(_loc->c_str()), _line));
|
||||
backtrace->set(join("", "frame#", _bt_cnt++), _entry);
|
||||
}
|
||||
}
|
||||
return backtrace;
|
||||
}
|
||||
|
||||
template <typename CorrelationIdType>
|
||||
uint64_t
|
||||
get_parent_stack_id([[maybe_unused]] const CorrelationIdType& correlation_id)
|
||||
{
|
||||
#if(ROCPROFILER_VERSION >= 700)
|
||||
if constexpr(std::is_same_v<rocprofiler_correlation_id_t, CorrelationIdType>)
|
||||
{
|
||||
return correlation_id.ancestor;
|
||||
}
|
||||
else
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
auto
|
||||
get_extdata(const rocprofiler_callback_tracing_record_t& record)
|
||||
{
|
||||
constexpr auto message_key = "message";
|
||||
auto args = callback_arg_array_t{};
|
||||
auto extdata = ::rocpd::json::create();
|
||||
|
||||
rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, 2, &args);
|
||||
|
||||
for(auto [key, val] : args)
|
||||
{
|
||||
if(key == message_key)
|
||||
{
|
||||
extdata->set(key, val);
|
||||
}
|
||||
}
|
||||
|
||||
return extdata;
|
||||
}
|
||||
|
||||
struct scope_destructor
|
||||
{
|
||||
/// \fn scope_destructor(FuncT&& _fini, InitT&& _init)
|
||||
/// \tparam FuncT "std::function<void()> or void (*)()"
|
||||
/// \tparam InitT "std::function<void()> or void (*)()"
|
||||
/// \param _fini Function to execute when object is destroyed
|
||||
/// \param _init Function to execute when object is created (optional)
|
||||
///
|
||||
/// \brief Provides a utility to perform an operation when exiting a scope.
|
||||
template <typename FuncT, typename InitT = void (*)()>
|
||||
scope_destructor(FuncT&& _fini, InitT&& _init = []() {});
|
||||
|
||||
~scope_destructor() { m_functor(); }
|
||||
|
||||
// delete copy operations
|
||||
scope_destructor(const scope_destructor&) = delete;
|
||||
scope_destructor& operator=(const scope_destructor&) = delete;
|
||||
|
||||
// allow move operations
|
||||
scope_destructor(scope_destructor&& rhs) noexcept;
|
||||
scope_destructor& operator=(scope_destructor&& rhs) noexcept;
|
||||
|
||||
private:
|
||||
std::function<void()> m_functor = []() {};
|
||||
};
|
||||
|
||||
template <typename FuncT, typename InitT>
|
||||
scope_destructor::scope_destructor(FuncT&& _fini, InitT&& _init)
|
||||
: m_functor{ std::forward<FuncT>(_fini) }
|
||||
{
|
||||
_init();
|
||||
}
|
||||
|
||||
inline scope_destructor::scope_destructor(scope_destructor&& rhs) noexcept
|
||||
: m_functor{ std::move(rhs.m_functor) }
|
||||
{
|
||||
rhs.m_functor = []() {};
|
||||
}
|
||||
|
||||
inline scope_destructor&
|
||||
scope_destructor::operator=(scope_destructor&& rhs) noexcept
|
||||
{
|
||||
if(this != &rhs)
|
||||
{
|
||||
m_functor = std::move(rhs.m_functor);
|
||||
rhs.m_functor = []() {};
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
using kernel_rename_stack_t = std::stack<uint64_t>;
|
||||
|
||||
thread_local auto thread_dispatch_rename = as_pointer<kernel_rename_stack_t>();
|
||||
thread_local auto thread_dispatch_rename_dtor = scope_destructor{ []() {
|
||||
delete thread_dispatch_rename;
|
||||
thread_dispatch_rename = nullptr;
|
||||
} };
|
||||
|
||||
template <typename Category>
|
||||
void
|
||||
cache_category()
|
||||
{
|
||||
trace_cache::get_metadata_registry().add_string(trait::name<Category>::value);
|
||||
}
|
||||
|
||||
void
|
||||
cache_add_thread_info(uint64_t tid)
|
||||
{
|
||||
trace_cache::get_metadata_registry().add_thread_info(
|
||||
{ getppid(), getpid(), tid, 0, 0, "{}" });
|
||||
}
|
||||
|
||||
void
|
||||
cache_add_track(const char* track_name, uint64_t tid)
|
||||
{
|
||||
trace_cache::get_metadata_registry().add_track({ track_name, tid, "{}" });
|
||||
}
|
||||
|
||||
size_t
|
||||
get_mem_copy_dst_address(
|
||||
[[maybe_unused]] const rocprofiler_buffer_tracing_memory_copy_record_t& record)
|
||||
{
|
||||
#if(ROCPROFILER_VERSION >= 700)
|
||||
return record.dst_address.value;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t
|
||||
get_mem_copy_src_address(
|
||||
[[maybe_unused]] const rocprofiler_buffer_tracing_memory_copy_record_t& record)
|
||||
{
|
||||
#if(ROCPROFILER_VERSION >= 700)
|
||||
return record.src_address.value;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
size_t
|
||||
get_mem_alloc_address(
|
||||
[[maybe_unused]] const rocprofiler_buffer_tracing_memory_allocation_record_t& record)
|
||||
{
|
||||
# if(ROCPROFILER_VERSION >= 700)
|
||||
return record.address.value;
|
||||
# else
|
||||
return static_cast<size_t>(record.address.handle);
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
|
||||
// clang-format off
|
||||
void
|
||||
cache_region(const rocprofiler_callback_tracing_record_t* record,
|
||||
const rocprofiler_timestamp_t start_timestamp,
|
||||
const rocprofiler_timestamp_t end_timestamp, const std::string& call_stack,
|
||||
const std::string& args_str, const std::string& category)
|
||||
|
||||
{
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::region,
|
||||
record->thread_id,
|
||||
static_cast<int32_t>(record->kind),
|
||||
static_cast<int32_t>(record->operation),
|
||||
record->correlation_id.internal,
|
||||
get_parent_stack_id(record->correlation_id),
|
||||
start_timestamp,
|
||||
end_timestamp,
|
||||
call_stack.c_str(),
|
||||
args_str.c_str(),
|
||||
category.c_str());
|
||||
}
|
||||
|
||||
void
|
||||
cache_kernel_dispatch(rocprofiler_buffer_tracing_kernel_dispatch_record_t* record)
|
||||
{
|
||||
auto stream_handle = get_stream_id(record).handle;
|
||||
auto queue_handle = record->dispatch_info.queue_id.handle;
|
||||
|
||||
trace_cache::get_metadata_registry().add_queue(queue_handle);
|
||||
trace_cache::get_metadata_registry().add_stream(stream_handle);
|
||||
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::kernel_dispatch,
|
||||
record->start_timestamp,
|
||||
record->end_timestamp,
|
||||
record->thread_id,
|
||||
record->dispatch_info.agent_id.handle,
|
||||
record->dispatch_info.kernel_id,
|
||||
record->dispatch_info.dispatch_id,
|
||||
record->dispatch_info.queue_id.handle,
|
||||
record->correlation_id.internal,
|
||||
get_parent_stack_id(record->correlation_id),
|
||||
record->dispatch_info.private_segment_size,
|
||||
record->dispatch_info.group_segment_size,
|
||||
record->dispatch_info.workgroup_size.x,
|
||||
record->dispatch_info.workgroup_size.y,
|
||||
record->dispatch_info.workgroup_size.z,
|
||||
record->dispatch_info.grid_size.x,
|
||||
record->dispatch_info.grid_size.y,
|
||||
record->dispatch_info.grid_size.z,
|
||||
stream_handle);
|
||||
}
|
||||
|
||||
void
|
||||
cache_memory_copy(rocprofiler_buffer_tracing_memory_copy_record_t* record)
|
||||
{
|
||||
auto stream_handle = get_stream_id(record).handle;
|
||||
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::memory_copy,
|
||||
record->start_timestamp,
|
||||
record->end_timestamp,
|
||||
record->thread_id,
|
||||
record->dst_agent_id.handle,
|
||||
record->src_agent_id.handle,
|
||||
static_cast<int32_t>(record->kind),
|
||||
static_cast<int32_t>(record->operation),
|
||||
record->bytes,
|
||||
record->correlation_id.internal,
|
||||
get_parent_stack_id(record->correlation_id),
|
||||
get_mem_copy_dst_address(*record),
|
||||
get_mem_copy_src_address(*record),
|
||||
stream_handle);
|
||||
}
|
||||
|
||||
#if (ROCPROFILER_VERSION >= 600)
|
||||
void
|
||||
cache_memory_allocation(rocprofiler_buffer_tracing_memory_allocation_record_t* record)
|
||||
{
|
||||
auto stream_handle = get_stream_id(record).handle;
|
||||
|
||||
trace_cache::get_metadata_registry().add_stream(stream_handle);
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::memory_alloc,
|
||||
record->start_timestamp,
|
||||
record->end_timestamp,
|
||||
record->thread_id,
|
||||
record->agent_id.handle,
|
||||
static_cast<int32_t>(record->kind),
|
||||
static_cast<int32_t>(record->operation),
|
||||
record->allocation_size,
|
||||
record->correlation_id.internal,
|
||||
get_parent_stack_id(record->correlation_id),
|
||||
get_mem_alloc_address(*record),
|
||||
stream_handle);
|
||||
}
|
||||
#endif
|
||||
// clang-format on
|
||||
template <typename CategoryT>
|
||||
void
|
||||
tool_tracing_callback_start(CategoryT, rocprofiler_callback_tracing_record_t record,
|
||||
@@ -391,7 +708,8 @@ tool_tracing_callback_stop(
|
||||
{
|
||||
ROCPROFSYS_CONDITIONAL_ABORT_F(
|
||||
get_marker_started_ranges().empty(),
|
||||
"roctxRangeStop does not have corresponding roctxRangeStart on "
|
||||
"roctxRangeStop does not have corresponding roctxRangeStart "
|
||||
"on "
|
||||
"this thread");
|
||||
|
||||
auto _hash = get_marker_started_ranges().back().first;
|
||||
@@ -473,8 +791,9 @@ tool_tracing_callback_stop(
|
||||
join(':', ::basename(_loc->c_str()), _line));
|
||||
if(_bt_cnt < 10)
|
||||
{
|
||||
// Prepend zero for better ordering in UI. Only one zero
|
||||
// is ever necessary since stack depth is limited to 16.
|
||||
// Prepend zero for better ordering in UI. Only one
|
||||
// zero is ever necessary since stack depth is limited
|
||||
// to 16.
|
||||
tracing::add_perfetto_annotation(
|
||||
ctx, join("", "frame#0", _bt_cnt++), _entry);
|
||||
}
|
||||
@@ -493,6 +812,33 @@ tool_tracing_callback_stop(
|
||||
tracing::add_perfetto_annotation(ctx, "end_ns", _end_ts);
|
||||
});
|
||||
}
|
||||
|
||||
// Insert callback trace into database
|
||||
auto args = function_args_t{};
|
||||
|
||||
rocprofiler_iterate_callback_tracing_kind_operation_args(
|
||||
record, iterate_args_callback, 2, &args);
|
||||
|
||||
auto call_stack = get_backtrace(_bt_data);
|
||||
uint64_t _beg_ts = user_data->value;
|
||||
uint64_t _end_ts = ts;
|
||||
|
||||
{
|
||||
cache_category<CategoryT>();
|
||||
cache_add_thread_info(record.thread_id);
|
||||
std::string args_str;
|
||||
|
||||
std::for_each(args.begin(), args.end(), [&args_str](const argument_info& arg) {
|
||||
const auto* delimiter = ";;";
|
||||
std::stringstream ss;
|
||||
ss << arg.arg_number << delimiter << arg.arg_type << delimiter << arg.arg_name
|
||||
<< delimiter << arg.arg_value << delimiter;
|
||||
args_str.append(ss.str());
|
||||
});
|
||||
|
||||
cache_region(&record, _beg_ts, _end_ts, call_stack->to_string(), args_str,
|
||||
trait::name<CategoryT>::value);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -535,6 +881,7 @@ tool_code_object_callback(rocprofiler_callback_tracing_record_t record,
|
||||
_data.emplace_back(
|
||||
code_object_callback_record_t{ ts, record, data_v });
|
||||
});
|
||||
trace_cache::get_metadata_registry().add_code_object(data_v);
|
||||
}
|
||||
else if(record.operation ==
|
||||
ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER)
|
||||
@@ -545,6 +892,7 @@ tool_code_object_callback(rocprofiler_callback_tracing_record_t record,
|
||||
_data.emplace_back(
|
||||
new kernel_symbol_callback_record_t{ ts, record, data_v });
|
||||
});
|
||||
trace_cache::get_metadata_registry().add_kernel_symbol(data_v);
|
||||
}
|
||||
}
|
||||
return;
|
||||
@@ -635,7 +983,6 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY:
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
case ROCPROFILER_CALLBACK_TRACING_OMPT:
|
||||
case ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION:
|
||||
case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION:
|
||||
#endif
|
||||
#if(ROCPROFILER_VERSION >= 700)
|
||||
@@ -646,6 +993,12 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
record.kind);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
ROCPROFSYS_CI_ABORT(true, "Unhandled callback record: \n\t%s\n",
|
||||
info.str().c_str());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
|
||||
@@ -657,8 +1010,11 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
constexpr bool bt_with_signal_frame = true;
|
||||
|
||||
auto _bt_data = std::optional<backtrace_entry_vec_t>{};
|
||||
auto use_perfetto =
|
||||
(config::get_use_perfetto() && config::get_perfetto_annotations());
|
||||
auto use_rocpd = config::get_use_rocpd();
|
||||
|
||||
if(config::get_use_perfetto() && config::get_perfetto_annotations() &&
|
||||
if((use_perfetto || use_rocpd) &&
|
||||
tool_data->backtrace_operations.at(record.kind).count(record.operation) > 0)
|
||||
{
|
||||
auto _backtrace = tim::get_unw_stack<bt_stack_depth, bt_ignore_depth,
|
||||
@@ -735,7 +1091,6 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY:
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
case ROCPROFILER_CALLBACK_TRACING_OMPT:
|
||||
case ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION:
|
||||
case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION:
|
||||
#endif
|
||||
#if(ROCPROFILER_VERSION >= 700)
|
||||
@@ -746,27 +1101,41 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
record.kind);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
ROCPROFSYS_CI_ABORT(true, "Unhandled callback record\n\t%s\n",
|
||||
info.str().c_str());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_NONE)
|
||||
{
|
||||
if(record.kind == ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH &&
|
||||
record.operation == ROCPROFILER_KERNEL_DISPATCH_COMPLETE)
|
||||
switch(record.kind)
|
||||
{
|
||||
auto* _data =
|
||||
static_cast<rocprofiler_callback_tracing_kernel_dispatch_data_t*>(
|
||||
record.payload);
|
||||
case ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH:
|
||||
{
|
||||
if(record.operation == ROCPROFILER_KERNEL_DISPATCH_COMPLETE)
|
||||
{
|
||||
auto* _data =
|
||||
static_cast<rocprofiler_callback_tracing_kernel_dispatch_data_t*>(
|
||||
record.payload);
|
||||
|
||||
// save for post-processing
|
||||
get_kernel_dispatch_timestamps().emplace(
|
||||
_data->dispatch_info.dispatch_id,
|
||||
timing_interval{ _data->start_timestamp, _data->end_timestamp });
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCPROFSYS_WARNING_F(
|
||||
1, "tool_tracing_callback: unhandled PHASE_NONE callback record\n\t%s\n",
|
||||
info.str().c_str());
|
||||
// save for post-processing
|
||||
get_kernel_dispatch_timestamps().emplace(
|
||||
_data->dispatch_info.dispatch_id,
|
||||
timing_interval{ _data->start_timestamp, _data->end_timestamp });
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
{
|
||||
ROCPROFSYS_WARNING_F(1,
|
||||
"tool_tracing_callback: unhandled PHASE_NONE "
|
||||
"callback record\n\t%s\n",
|
||||
info.str().c_str());
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -820,10 +1189,20 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
|
||||
uint64_t _stream_id = get_stream_id(record).handle;
|
||||
if(_stream_id == 0)
|
||||
{
|
||||
// kernel is not associated with a HIP stream
|
||||
// kernel_dispatch is not associated with a HIP stream
|
||||
_group_by_queue = true;
|
||||
}
|
||||
|
||||
{
|
||||
cache_category<category::rocm_kernel_dispatch>();
|
||||
cache_add_thread_info(record->thread_id);
|
||||
cache_add_track(JOIN("", "GPU Kernel Dispatch [", _agent->device_id,
|
||||
"] Queue ", _queue_id.handle)
|
||||
.c_str(),
|
||||
record->thread_id);
|
||||
cache_kernel_dispatch(record);
|
||||
}
|
||||
|
||||
if(get_use_timemory())
|
||||
{
|
||||
const auto& _tinfo = thread_info::get(record->thread_id, SystemTID);
|
||||
@@ -937,6 +1316,20 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
|
||||
_group_by_queue = true;
|
||||
}
|
||||
|
||||
{
|
||||
size_t thread_idx = record->thread_id;
|
||||
std::string track_name;
|
||||
|
||||
track_name =
|
||||
JOIN("", "GPU Memory Copy to Agent [",
|
||||
_dst_agent->logical_node_id, "] Thread ", thread_idx);
|
||||
|
||||
cache_category<category::rocm_memory_copy>();
|
||||
cache_add_track(track_name.c_str(), record->thread_id);
|
||||
|
||||
cache_memory_copy(record);
|
||||
}
|
||||
|
||||
if(get_use_timemory())
|
||||
{
|
||||
const auto& _tinfo = thread_info::get(record->thread_id, SystemTID);
|
||||
@@ -1005,6 +1398,26 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
|
||||
}
|
||||
}
|
||||
}
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION)
|
||||
{
|
||||
auto* record =
|
||||
static_cast<rocprofiler_buffer_tracing_memory_allocation_record_t*>(
|
||||
header->payload);
|
||||
|
||||
{
|
||||
cache_category<category::rocm_memory_allocate>();
|
||||
cache_add_thread_info(record->thread_id);
|
||||
cache_memory_allocation(record);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
else if(header->kind == ROCPROFILER_BUFFER_TRACING_HSA_CORE_API ||
|
||||
header->kind == ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API)
|
||||
{
|
||||
// Not handling those buffered events
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCPROFSYS_THROW(
|
||||
@@ -1098,10 +1511,10 @@ counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_da
|
||||
ROCPROFSYS_CONDITIONAL_ABORT_F(
|
||||
!_agent, "unable to find tool agent for agent (id=%zu)\n",
|
||||
_agent_id.handle);
|
||||
ROCPROFSYS_CONDITIONAL_ABORT_F(
|
||||
!_info,
|
||||
"unable to find counter info for counter (id=%zu) on agent (id=%zu)\n",
|
||||
itr.first.handle, _agent_id.handle);
|
||||
ROCPROFSYS_CONDITIONAL_ABORT_F(!_info,
|
||||
"unable to find counter info for counter "
|
||||
"(id=%zu) on agent (id=%zu)\n",
|
||||
itr.first.handle, _agent_id.handle);
|
||||
|
||||
auto _dev_id = static_cast<uint32_t>(_agent->device_id);
|
||||
|
||||
@@ -1159,7 +1572,6 @@ void
|
||||
flush()
|
||||
{
|
||||
if(!tool_data) return;
|
||||
|
||||
for(auto itr : tool_data->get_buffers())
|
||||
{
|
||||
if(itr.handle > 0)
|
||||
@@ -1268,6 +1680,24 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
|
||||
_data->primary_ctx, ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, nullptr, 0,
|
||||
tool_code_object_callback, _data));
|
||||
|
||||
auto external_corr_id_request_kinds =
|
||||
std::array<rocprofiler_external_correlation_id_request_kind_t, 3>{
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH,
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY,
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_ALLOCATION
|
||||
#endif
|
||||
};
|
||||
|
||||
// Insert the default stream and queue info to ensure that the default entry is
|
||||
{
|
||||
trace_cache::get_metadata_registry().add_stream(0);
|
||||
trace_cache::get_metadata_registry().add_queue(0);
|
||||
}
|
||||
// ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
|
||||
// _data->primary_ctx, ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, nullptr, 0,
|
||||
// tool_code_object_callback, _data));
|
||||
|
||||
for(auto itr : {
|
||||
ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API,
|
||||
@@ -1296,17 +1726,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
|
||||
}
|
||||
}
|
||||
|
||||
constexpr auto buffer_size = 8192;
|
||||
constexpr auto watermark = 7936;
|
||||
|
||||
// Configure external correlation id request service for kernel dispatch
|
||||
// and memory copy.
|
||||
|
||||
auto external_corr_id_request_kinds =
|
||||
std::array<rocprofiler_external_correlation_id_request_kind_t, 2>{
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH,
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY
|
||||
};
|
||||
constexpr auto buffer_size = 16 * 4096;
|
||||
constexpr auto watermark = 15 * 4096;
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_configure_external_correlation_id_request_service(
|
||||
_data->primary_ctx, external_corr_id_request_kinds.data(),
|
||||
@@ -1334,7 +1755,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
|
||||
_data->primary_ctx, ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, nullptr, 0,
|
||||
_data->kernel_dispatch_buffer));
|
||||
}
|
||||
|
||||
// ROCPROFILER_BUFFER_TRACING_HSA_CORE_API, ///< @see
|
||||
// ::rocprofiler_hsa_core_api_id_t ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API,
|
||||
if(_buffered_domain.count(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY) > 0)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(
|
||||
@@ -1347,19 +1769,39 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
|
||||
_data->memory_copy_buffer));
|
||||
}
|
||||
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
if(_buffered_domain.count(ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION) > 0)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(
|
||||
_data->primary_ctx, buffer_size, watermark,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS, tool_tracing_buffered, tool_data,
|
||||
&_data->memory_alloc_buffer));
|
||||
if(_data->memory_alloc_buffer.handle == 0UL)
|
||||
{
|
||||
ROCPROFSYS_CI_ABORT(true, "Failed to create memory allocation buffer\n");
|
||||
}
|
||||
auto _ops =
|
||||
rocprofiler_sdk::get_operations(ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION);
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service(
|
||||
_data->primary_ctx, ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION, nullptr, 0,
|
||||
_data->memory_alloc_buffer));
|
||||
}
|
||||
#endif
|
||||
|
||||
if(!_counter_events.empty())
|
||||
{
|
||||
for(const auto& itr : _data->gpu_agents)
|
||||
{
|
||||
const auto& _agent_id = rocprofiler_agent_id_t{ itr.agent->handle };
|
||||
_data->agent_events.emplace(
|
||||
itr.agent->id,
|
||||
create_agent_profile(itr.agent->id, _counter_events, _data));
|
||||
_agent_id, create_agent_profile(_agent_id, _counter_events, _data));
|
||||
}
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&_data->counter_ctx));
|
||||
|
||||
auto _operations = std::array<rocprofiler_tracing_operation_t, 1>{
|
||||
ROCPROFILER_KERNEL_DISPATCH_COMPLETE
|
||||
ROCPROFILER_KERNEL_DISPATCH_COMPLETE,
|
||||
};
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
|
||||
@@ -1393,8 +1835,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
|
||||
}
|
||||
}
|
||||
|
||||
// throwaway context for handling the profiler control API. If primary_ctx were used,
|
||||
// we would get profiler pause callback but never get profiler resume callback
|
||||
// throwaway context for handling the profiler control API. If primary_ctx were
|
||||
// used, we would get profiler pause callback but never get profiler resume
|
||||
// callback
|
||||
{
|
||||
auto _local_ctx = rocprofiler_context_id_t{ 0 };
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&_local_ctx));
|
||||
@@ -1405,8 +1848,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
|
||||
|
||||
if(!is_valid(_data->primary_ctx))
|
||||
{
|
||||
// notify rocprofiler that initialization failed and all the contexts, buffers,
|
||||
// etc. created should be ignored
|
||||
// notify rocprofiler that initialization failed and all the contexts,
|
||||
// buffers, etc. created should be ignored
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -1454,13 +1897,7 @@ tool_fini(void* callback_data)
|
||||
|
||||
void
|
||||
setup()
|
||||
{
|
||||
if(int status = 0;
|
||||
rocprofiler_is_initialized(&status) == ROCPROFILER_STATUS_SUCCESS && status == 0)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_force_configure(&rocprofiler_configure));
|
||||
}
|
||||
}
|
||||
{}
|
||||
|
||||
void
|
||||
shutdown()
|
||||
|
||||
+27
-49
@@ -91,15 +91,17 @@ get_agent_counter_info(const tool_agent_vec_t& _agents)
|
||||
|
||||
for(auto itr : _agents)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
|
||||
itr.agent->id, counters_supported_callback, &_data));
|
||||
const auto& _agent_id = rocprofiler_agent_id_t{ itr.agent->handle };
|
||||
|
||||
std::sort(_data.at(itr.agent->id).begin(), _data.at(itr.agent->id).end(),
|
||||
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
|
||||
_agent_id, counters_supported_callback, &_data));
|
||||
|
||||
std::sort(_data.at(_agent_id).begin(), _data.at(_agent_id).end(),
|
||||
[](const auto& lhs, const auto& rhs) {
|
||||
return (lhs.id.handle < rhs.id.handle);
|
||||
});
|
||||
|
||||
for(auto& citr : _data.at(itr.agent->id))
|
||||
for(auto& citr : _data.at(_agent_id))
|
||||
{
|
||||
std::sort(citr.dimension_info.begin(), citr.dimension_info.end(),
|
||||
[](const auto& lhs, const auto& rhs) { return (lhs.id < rhs.id); });
|
||||
@@ -123,38 +125,16 @@ client_data::initialize()
|
||||
buffered_tracing_info = rocprofiler::sdk::get_buffer_tracing_names();
|
||||
callback_tracing_info = rocprofiler::sdk::get_callback_tracing_names();
|
||||
|
||||
static constexpr auto supported_agent_info_version = ROCPROFILER_AGENT_INFO_VERSION_0;
|
||||
|
||||
rocprofiler_query_available_agents_cb_t iterate_cb =
|
||||
[](rocprofiler_agent_version_t version, const void** agents_arr,
|
||||
size_t num_agents, void* user_data) {
|
||||
ROCPROFSYS_CONDITIONAL_ABORT(version != supported_agent_info_version,
|
||||
"rocprofiler agent info version != expected "
|
||||
"agent info version (=%i). value: %i\n",
|
||||
supported_agent_info_version, version);
|
||||
|
||||
auto _agents_v = std::vector<rocprofiler_agent_v0_t>{};
|
||||
for(size_t i = 0; i < num_agents; ++i)
|
||||
{
|
||||
const auto* _agent =
|
||||
static_cast<const rocprofiler_agent_v0_t*>(agents_arr[i]);
|
||||
_agents_v.emplace_back(*_agent);
|
||||
}
|
||||
|
||||
auto* tool_data_v = as_client_data(user_data);
|
||||
tool_data_v->set_agents(std::move(_agents_v));
|
||||
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_query_available_agents(
|
||||
supported_agent_info_version, iterate_cb, sizeof(rocprofiler_agent_t), this));
|
||||
set_agents();
|
||||
}
|
||||
|
||||
void
|
||||
client_data::initialize_event_info()
|
||||
{
|
||||
if(agents.empty()) initialize();
|
||||
if(agent_manager::get_instance().get_agents().empty())
|
||||
{
|
||||
initialize();
|
||||
}
|
||||
|
||||
if(agent_counter_info.size() != gpu_agents.size())
|
||||
agent_counter_info = get_agent_counter_info(gpu_agents);
|
||||
@@ -166,14 +146,15 @@ client_data::initialize_event_info()
|
||||
|
||||
for(const auto& aitr : gpu_agents)
|
||||
{
|
||||
auto _dev_index = aitr.device_id;
|
||||
auto _device_qualifier_sym = JOIN("", ":device=", _dev_index);
|
||||
auto _device_qualifier =
|
||||
auto _dev_index = aitr.device_id;
|
||||
const auto& _agent_id = rocprofiler_agent_id_t{ aitr.agent->handle };
|
||||
auto _device_qualifier_sym = JOIN("", ":device=", _dev_index);
|
||||
auto _device_qualifier =
|
||||
tim::hardware_counters::qualifier{ true, static_cast<int>(_dev_index),
|
||||
_device_qualifier_sym,
|
||||
JOIN(" ", "Device", _dev_index) };
|
||||
|
||||
auto _counter_info = agent_counter_info.at(aitr.agent->id);
|
||||
auto _counter_info = agent_counter_info.at(_agent_id);
|
||||
std::sort(_counter_info.begin(), _counter_info.end(),
|
||||
[](const rocprofiler_tool_counter_info_t& lhs,
|
||||
const rocprofiler_tool_counter_info_t& rhs) {
|
||||
@@ -248,23 +229,20 @@ client_data::initialize_event_info()
|
||||
}
|
||||
|
||||
void
|
||||
client_data::set_agents(agent_vec_t&& _agents_v)
|
||||
client_data::set_agents()
|
||||
{
|
||||
agents = std::move(_agents_v);
|
||||
auto& agent_mngr = agent_manager::get_instance();
|
||||
|
||||
std::sort(agents.begin(), agents.end(),
|
||||
[](const auto& lhs, const auto& rhs) { return lhs.node_id < rhs.node_id; });
|
||||
auto fill_agents = [&](agent_type type, std::vector<tool_agent>& out) {
|
||||
const auto& _agents = agent_mngr.get_agents_by_type(type);
|
||||
for(const auto& agent : _agents)
|
||||
{
|
||||
out.emplace_back(tool_agent{ agent->device_type_index, agent.get() });
|
||||
}
|
||||
};
|
||||
|
||||
cpu_agents.clear();
|
||||
gpu_agents.clear();
|
||||
|
||||
for(const auto& itr : agents)
|
||||
{
|
||||
if(itr.type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
cpu_agents.emplace_back(tool_agent{ cpu_agents.size(), &itr });
|
||||
else if(itr.type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
gpu_agents.emplace_back(tool_agent{ gpu_agents.size(), &itr });
|
||||
}
|
||||
fill_agents(agent_type::GPU, gpu_agents);
|
||||
fill_agents(agent_type::CPU, cpu_agents);
|
||||
}
|
||||
} // namespace rocprofiler_sdk
|
||||
} // namespace rocprofsys
|
||||
|
||||
+45
-11
@@ -23,6 +23,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "common/synchronized.hpp"
|
||||
#include "core/agent_manager.hpp"
|
||||
#include "core/timemory.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/agent.h>
|
||||
@@ -50,6 +51,8 @@ using kernel_symbol_map_t =
|
||||
std::unordered_map<rocprofiler_kernel_id_t, kernel_symbol_data_t>;
|
||||
using callback_arg_array_t = std::vector<std::pair<std::string, std::string>>;
|
||||
|
||||
using rocprofsys_agent_t = agent;
|
||||
|
||||
struct code_object_callback_record_t
|
||||
{
|
||||
uint64_t timestamp = 0;
|
||||
@@ -88,8 +91,8 @@ struct rocprofiler_tool_counter_info_t : rocprofiler_counter_info_v0_t
|
||||
|
||||
struct tool_agent
|
||||
{
|
||||
uint64_t device_id = 0;
|
||||
const rocprofiler_agent_v0_t* agent = nullptr;
|
||||
uint64_t device_id = 0;
|
||||
const rocprofsys_agent_t* agent = nullptr;
|
||||
};
|
||||
|
||||
struct timing_interval
|
||||
@@ -98,6 +101,16 @@ struct timing_interval
|
||||
rocprofiler_timestamp_t end = 0;
|
||||
};
|
||||
|
||||
struct argument_info
|
||||
{
|
||||
uint32_t arg_number = 0;
|
||||
std::string arg_type = {};
|
||||
std::string arg_name = {};
|
||||
std::string arg_value = {};
|
||||
};
|
||||
|
||||
using function_args_t = std::vector<argument_info>;
|
||||
|
||||
using agent_counter_info_map_t =
|
||||
std::unordered_map<rocprofiler_agent_id_t,
|
||||
std::vector<rocprofiler_tool_counter_info_t>>;
|
||||
@@ -117,7 +130,7 @@ using backtrace_operation_map_t =
|
||||
|
||||
struct client_data
|
||||
{
|
||||
static constexpr size_t num_buffers = 3;
|
||||
static constexpr size_t num_buffers = 4;
|
||||
static constexpr size_t num_contexts = 2;
|
||||
|
||||
using buffer_name_info_t = rocprofiler::sdk::buffer_name_info_t<std::string_view>;
|
||||
@@ -134,8 +147,8 @@ struct client_data
|
||||
rocprofiler_context_id_t counter_ctx = { 0 };
|
||||
rocprofiler_buffer_id_t kernel_dispatch_buffer = { 0 };
|
||||
rocprofiler_buffer_id_t memory_copy_buffer = { 0 };
|
||||
rocprofiler_buffer_id_t memory_alloc_buffer = { 0 };
|
||||
rocprofiler_buffer_id_t counter_collection_buffer = { 0 };
|
||||
std::vector<rocprofiler_agent_v0_t> agents = {};
|
||||
std::vector<tool_agent> cpu_agents = {};
|
||||
std::vector<tool_agent> gpu_agents = {};
|
||||
std::vector<hardware_counter_info> events_info = {};
|
||||
@@ -150,14 +163,16 @@ struct client_data
|
||||
|
||||
void initialize();
|
||||
void initialize_event_info();
|
||||
void set_agents(agent_vec_t&& agents);
|
||||
void set_agents();
|
||||
context_id_vec_t get_contexts() const;
|
||||
buffer_id_vec_t get_buffers() const;
|
||||
const rocprofiler_agent_t* get_agent(rocprofiler_agent_id_t _id) const;
|
||||
const rocprofsys_agent_t* get_agent(rocprofiler_agent_id_t _id) const;
|
||||
const tool_agent* get_gpu_tool_agent(rocprofiler_agent_id_t id) const;
|
||||
const kernel_symbol_data_t* get_kernel_symbol_info(uint64_t _kernel_id) const;
|
||||
const rocprofiler_tool_counter_info_t* get_tool_counter_info(
|
||||
rocprofiler_agent_id_t _agent_id, rocprofiler_counter_id_t _counter_id) const;
|
||||
const rocprofiler_callback_tracing_code_object_load_data_t* get_code_object_info(
|
||||
uint64_t code_object_id) const;
|
||||
};
|
||||
|
||||
inline client_data::context_id_vec_t
|
||||
@@ -175,23 +190,24 @@ client_data::get_buffers() const
|
||||
return buffer_id_vec_t{
|
||||
kernel_dispatch_buffer,
|
||||
memory_copy_buffer,
|
||||
memory_alloc_buffer,
|
||||
counter_collection_buffer,
|
||||
};
|
||||
}
|
||||
|
||||
inline const rocprofiler_agent_t*
|
||||
inline const rocprofsys_agent_t*
|
||||
client_data::get_agent(rocprofiler_agent_id_t _id) const
|
||||
{
|
||||
for(const auto& itr : agents)
|
||||
if(itr.id == _id) return &itr;
|
||||
return nullptr;
|
||||
const auto& agent = agent_manager::get_instance().get_agent_by_handle(_id.handle);
|
||||
|
||||
return &agent;
|
||||
}
|
||||
|
||||
inline const tool_agent*
|
||||
client_data::get_gpu_tool_agent(rocprofiler_agent_id_t id) const
|
||||
{
|
||||
for(const auto& itr : gpu_agents)
|
||||
if(id == itr.agent->id) return &itr;
|
||||
if(id.handle == itr.agent->handle) return &itr;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -223,6 +239,24 @@ client_data::get_tool_counter_info(rocprofiler_agent_id_t _agent_id,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
inline const rocprofiler_callback_tracing_code_object_load_data_t*
|
||||
client_data::get_code_object_info(uint64_t code_object_id) const
|
||||
{
|
||||
return code_object_records.rlock(
|
||||
[code_object_id](const auto& _data)
|
||||
-> const rocprofiler_callback_tracing_code_object_load_data_t* {
|
||||
for(const auto& itr : _data)
|
||||
{
|
||||
if(code_object_id == itr.payload.code_object_id)
|
||||
{
|
||||
return &itr.payload;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
});
|
||||
}
|
||||
|
||||
inline constexpr client_data*
|
||||
as_client_data(void* _ptr)
|
||||
{
|
||||
|
||||
@@ -31,13 +31,13 @@
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "core/rocpd/json.hpp"
|
||||
#include "core/state.hpp"
|
||||
#include "core/trace_cache/cache_manager.hpp"
|
||||
#include "core/utility.hpp"
|
||||
#include "library/components/backtrace.hpp"
|
||||
#include "library/components/backtrace_metrics.hpp"
|
||||
#include "library/components/backtrace_timestamp.hpp"
|
||||
#include "library/components/callchain.hpp"
|
||||
#include "library/perf.hpp"
|
||||
#include "library/ptl.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
#include "library/thread_data.hpp"
|
||||
#include "library/thread_info.hpp"
|
||||
@@ -59,6 +59,7 @@
|
||||
#include <timemory/mpl/quirks.hpp>
|
||||
#include <timemory/mpl/type_traits.hpp>
|
||||
#include <timemory/operations.hpp>
|
||||
#include <timemory/process/threading.hpp>
|
||||
#include <timemory/sampling/allocator.hpp>
|
||||
#include <timemory/sampling/overflow.hpp>
|
||||
#include <timemory/sampling/sampler.hpp>
|
||||
@@ -83,6 +84,7 @@
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
|
||||
#include <pthread.h>
|
||||
@@ -227,46 +229,65 @@ get_data_processor()
|
||||
return rocpd::data_processor::get_instance();
|
||||
}
|
||||
|
||||
template <typename Category>
|
||||
std::string
|
||||
get_track_name(const thread_info& _thread_info)
|
||||
{
|
||||
size_t thread_id = _thread_info.index_data->system_value;
|
||||
size_t sequent_value = _thread_info.index_data->sequent_value;
|
||||
constexpr auto sample_type =
|
||||
std::is_same_v<Category, category::timer_sampling> ? "Timer" : "Overflow";
|
||||
return JOIN(" ", "Thread", sequent_value, sample_type, "(S)", thread_id);
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_initialize_sampling_category()
|
||||
{
|
||||
static bool _is_initialized = false;
|
||||
if(_is_initialized) return;
|
||||
|
||||
auto& data_processor = get_data_processor();
|
||||
data_processor.insert_category(ROCPROFSYS_CATEGORY_SAMPLING,
|
||||
trait::name<category::sampling>::value);
|
||||
data_processor.insert_category(ROCPROFSYS_CATEGORY_OVERFLOW_SAMPLING,
|
||||
trait::name<category::overflow_sampling>::value);
|
||||
data_processor.insert_category(ROCPROFSYS_CATEGORY_TIMER_SAMPLING,
|
||||
trait::name<category::timer_sampling>::value);
|
||||
trace_cache::get_metadata_registry().add_string(
|
||||
trait::name<category::sampling>::value);
|
||||
trace_cache::get_metadata_registry().add_string(
|
||||
trait::name<category::overflow_sampling>::value);
|
||||
trace_cache::get_metadata_registry().add_string(
|
||||
trait::name<category::timer_sampling>::value);
|
||||
|
||||
_is_initialized = true;
|
||||
}
|
||||
|
||||
size_t
|
||||
void
|
||||
rocpd_initialize_thread_info(size_t tid)
|
||||
{
|
||||
const auto& _thread_info = thread_info::get(tid, SequentTID);
|
||||
ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", tid);
|
||||
if(!_thread_info) return -1;
|
||||
if(!_thread_info) return;
|
||||
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
|
||||
return data_processor.insert_thread_info(
|
||||
n_info.id, getppid(), getpid(), _thread_info->index_data->system_value,
|
||||
threading::get_thread_name().c_str(), _thread_info->get_start(),
|
||||
_thread_info->get_stop(), "{}");
|
||||
trace_cache::get_metadata_registry().add_thread_info(
|
||||
{ getppid(), getpid(),
|
||||
static_cast<size_t>(_thread_info->index_data->system_value),
|
||||
static_cast<uint32_t>(_thread_info->get_start()),
|
||||
static_cast<uint32_t>(_thread_info->get_stop()), "{}" });
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_init_track(const char* track_name, int64_t tid)
|
||||
rocpd_init_track(int64_t tid)
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
const auto& _thread_info = thread_info::get(tid, SequentTID);
|
||||
ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", tid);
|
||||
if(!_thread_info) return;
|
||||
|
||||
data_processor.insert_track(track_name, n_info.id, getpid(), tid, "{}");
|
||||
size_t thread_id = _thread_info->index_data->system_value;
|
||||
|
||||
const auto& _timer_track_name =
|
||||
get_track_name<category::timer_sampling>(*_thread_info);
|
||||
const auto& _overflow_track_name =
|
||||
get_track_name<category::overflow_sampling>(*_thread_info);
|
||||
|
||||
trace_cache::get_metadata_registry().add_track(
|
||||
{ _timer_track_name, thread_id, "{}" });
|
||||
trace_cache::get_metadata_registry().add_track(
|
||||
{ _overflow_track_name, thread_id, "{}" });
|
||||
}
|
||||
|
||||
template <typename Category>
|
||||
@@ -275,11 +296,12 @@ rocpd_insert_region(size_t thread_id, size_t start_time, size_t end_time, size_t
|
||||
const char* track, const char* call_stack = "{}",
|
||||
const char* line_info = "{}", const char* extdata = "{}")
|
||||
{
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto& data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto string_primary_key = data_processor.insert_string(trait::name<Category>::value);
|
||||
|
||||
auto event_id = data_processor.insert_event(category_enum_id<Category>::value, 0, 0,
|
||||
0, call_stack, line_info, extdata);
|
||||
auto event_id = data_processor.insert_event(string_primary_key, 0, 0, 0, call_stack,
|
||||
line_info, extdata);
|
||||
|
||||
data_processor.insert_region(n_info.id, getpid(), thread_id, start_time, end_time,
|
||||
name_id, event_id);
|
||||
@@ -849,6 +871,9 @@ configure(bool _setup, int64_t _tid)
|
||||
}
|
||||
}
|
||||
}
|
||||
rocpd_initialize_sampling_category();
|
||||
rocpd_initialize_thread_info(_tid);
|
||||
rocpd_init_track(_tid);
|
||||
|
||||
*_running = true;
|
||||
sampling::get_sampler_init(_tid)->sample();
|
||||
@@ -1403,7 +1428,7 @@ post_process_perfetto(int64_t _tid, const std::vector<timer_sampling_data>& _tim
|
||||
{
|
||||
auto _ncur = _ncount++;
|
||||
// the begin/end + HW counters will be same for entire call-stack so only
|
||||
// annotate the top and the bottom functons to keep the data consumption
|
||||
// annotate the top and the bottom functions to keep the data consumption
|
||||
// low
|
||||
bool _include_common = (_ncur == 0 || _ncur + 1 == itr.m_stack.size());
|
||||
|
||||
@@ -1707,14 +1732,14 @@ rocpd_post_process_overflow_data(
|
||||
.first->c_str();
|
||||
auto main_name_id = data_processor.insert_string(_main_name);
|
||||
|
||||
const auto& _track_name =
|
||||
JOIN(" ", "Thread", _thread_info->index_data->sequent_value, "Overflow",
|
||||
"(S)", _thread_info->index_data->system_value);
|
||||
size_t thread_id = _thread_info->index_data->system_value;
|
||||
|
||||
auto thread_primary_key = data_processor.map_thread_id_to_primary_key(thread_id);
|
||||
const auto _track_name =
|
||||
get_track_name<category::overflow_sampling>(*_thread_info);
|
||||
|
||||
auto thread_idx = rocpd_initialize_thread_info(_tid);
|
||||
rocpd_init_track(_track_name.c_str(), thread_idx);
|
||||
rocpd_insert_region<category::overflow_sampling>(
|
||||
thread_idx, _beg_ns, _end_ns, main_name_id, _track_name.c_str());
|
||||
thread_primary_key, _beg_ns, _end_ns, main_name_id, _track_name.c_str());
|
||||
|
||||
for(const auto& itr : _overflow_data)
|
||||
{
|
||||
@@ -1729,7 +1754,7 @@ rocpd_post_process_overflow_data(
|
||||
static_strings.emplace(demangle(iitr.name)).first->c_str();
|
||||
auto name_id = data_processor.insert_string(_name);
|
||||
rocpd_insert_region<category::overflow_sampling>(
|
||||
thread_idx, _beg, _end, name_id, _track_name.c_str(),
|
||||
thread_primary_key, _beg, _end, name_id, _track_name.c_str(),
|
||||
generate_call_stack_json(iitr).c_str(),
|
||||
generate_line_info_json(iitr).c_str());
|
||||
}
|
||||
@@ -1738,9 +1763,11 @@ rocpd_post_process_overflow_data(
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_post_process_backtrace_metrics(int64_t _tid,
|
||||
const std::vector<timer_sampling_data>& _timer_data)
|
||||
rocpd_post_process_backtrace_metrics(
|
||||
[[maybe_unused]] int64_t _tid,
|
||||
[[maybe_unused]] const std::vector<timer_sampling_data>& _timer_data)
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto _valid_metrics = backtrace_metrics::valid_array_t{};
|
||||
|
||||
for(const auto& itr : _timer_data)
|
||||
@@ -1752,17 +1779,20 @@ rocpd_post_process_backtrace_metrics(int64_t _ti
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(3 || get_debug_sampling(),
|
||||
"[%li] Post-processing metrics for rocpd...\n", _tid);
|
||||
backtrace_metrics::init_rocpd(_tid, _valid_metrics);
|
||||
backtrace_metrics::init_rocpd(_tid, _valid_metrics); // move to setup
|
||||
for(const auto& itr : _timer_data)
|
||||
itr.m_metrics.post_process_rocpd(_tid, 0.5 * (itr.m_beg + itr.m_end));
|
||||
backtrace_metrics::fini_rocpd(_tid, _valid_metrics);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_post_process_timer_data(int64_t _tid,
|
||||
const std::vector<timer_sampling_data>& _timer_data)
|
||||
rocpd_post_process_timer_data(
|
||||
[[maybe_unused]] int64_t _tid,
|
||||
[[maybe_unused]] const std::vector<timer_sampling_data>& _timer_data)
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto& data_processor = get_data_processor();
|
||||
|
||||
const auto& _thread_info = thread_info::get(_tid, SequentTID);
|
||||
@@ -1776,15 +1806,14 @@ rocpd_post_process_timer_data(int64_t _tid,
|
||||
auto _beg_ns = std::max(_timer_data.front().m_beg, _thread_info->get_start());
|
||||
auto _end_ns = std::min(_timer_data.back().m_end, _thread_info->get_stop());
|
||||
|
||||
const auto _track_name =
|
||||
JOIN(" ", "Thread", _thread_info->index_data->sequent_value, "(S)",
|
||||
_thread_info->index_data->system_value);
|
||||
const auto _track_name = get_track_name<category::timer_sampling>(*_thread_info);
|
||||
|
||||
auto thread_primary_key = data_processor.map_thread_id_to_primary_key(
|
||||
_thread_info->index_data->system_value);
|
||||
|
||||
auto thread_idx = rocpd_initialize_thread_info(_tid);
|
||||
rocpd_init_track(_track_name.c_str(), thread_idx);
|
||||
const auto main_name_id = data_processor.insert_string("samples [rocprof-sys]");
|
||||
rocpd_insert_region<category::timer_sampling>(thread_idx, _beg_ns, _end_ns,
|
||||
main_name_id, _track_name.c_str());
|
||||
rocpd_insert_region<category::timer_sampling>(
|
||||
thread_primary_key, _beg_ns, _end_ns, main_name_id, _track_name.c_str());
|
||||
|
||||
auto _labels = backtrace_metrics::get_hw_counter_labels(_tid);
|
||||
for(const auto& itr : _timer_data)
|
||||
@@ -1798,7 +1827,7 @@ rocpd_post_process_timer_data(int64_t _tid,
|
||||
{
|
||||
auto _ncur = _ncount++;
|
||||
// the begin/end + HW counters will be same for entire call-stack so only
|
||||
// annotate the top and the bottom functons to keep the data consumption
|
||||
// annotate the top and the bottom functions to keep the data consumption
|
||||
// low
|
||||
bool _include_common = (_ncur == 0 || _ncur + 1 == itr.m_stack.size());
|
||||
|
||||
@@ -1832,8 +1861,9 @@ rocpd_post_process_timer_data(int64_t _tid,
|
||||
inlined_call_stack->set("inlined", "true");
|
||||
|
||||
rocpd_insert_region<category::timer_sampling>(
|
||||
thread_idx, _beg, _end, inlined_name_id, _track_name.c_str(),
|
||||
inlined_call_stack->to_string().c_str(), "{}",
|
||||
thread_primary_key, _beg, _end, inlined_name_id,
|
||||
_track_name.c_str(), inlined_call_stack->to_string().c_str(),
|
||||
"{}",
|
||||
// Only include HW counters for first inlined function
|
||||
(_n == 0) ? hw_counter_json.c_str() : "{}");
|
||||
}
|
||||
@@ -1843,22 +1873,28 @@ rocpd_post_process_timer_data(int64_t _tid,
|
||||
const auto* _name = static_strings.emplace(iitr.name).first->c_str();
|
||||
const auto name_id = data_processor.insert_string(_name);
|
||||
rocpd_insert_region<category::timer_sampling>(
|
||||
thread_idx, _beg, _end, name_id, _track_name.c_str(),
|
||||
thread_primary_key, _beg, _end, name_id, _track_name.c_str(),
|
||||
generate_call_stack_json(iitr).c_str(),
|
||||
generate_line_info_json(iitr).c_str(), hw_counter_json.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
post_process_rocpd(int64_t _tid, const std::vector<timer_sampling_data>& _timer_data,
|
||||
const std::vector<overflow_sampling_data>& _overflow_data)
|
||||
{
|
||||
rocpd_initialize_sampling_category();
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
rocpd_post_process_overflow_data(_tid, _overflow_data);
|
||||
rocpd_post_process_timer_data(_tid, _timer_data);
|
||||
#else
|
||||
(void) _tid;
|
||||
(void) _timer_data;
|
||||
(void) _overflow_data;
|
||||
#endif
|
||||
}
|
||||
|
||||
struct sampling_initialization
|
||||
|
||||
Ссылка в новой задаче
Block a user