* Rocpd part 2, caching

* Fix shadowed variables

* backward compatibility

* Fixed designated initializers

* Fix timemory include

* Remove benchmark & Fix build issues for rhel

* Add missing bracket

* Fix shadowing and pedantic

* Fix pedantic pt2

* Fix duplicated SDK calls

* Add decay in get_size_impl

* Rename sample cache to trace cache

* Add cache storage supported types

* Resolving track naming in sampling module

* fix sampling of flushing thread

* fix sampling of flushing thread 2

* throw exception upon store while buffer storage is not running

* Prevent fork crashing

* Fix rebase issue

* Applied suggestions from code review

* Change flushing thread to use PTL

* Fix agent creation order

* Fix stream id ci throw

* Remove force setup of rocprofiler-sdk

* Code cleanup

* Change initialization for agent

* Add missing namespace

* Fix the mismatch within the tool_agent->device_id

* Switch from using handle to use agent type index

* Fix pmc info comparator in metadata registry

---------

Co-authored-by: Aleksandar <aleksandar.djordjevic@amd.com>
Co-authored-by: Milan Radosavljevic <milan.radosavljevic@amd.com>
Co-authored-by: Marjan Antic <marantic@amd.com>
Этот коммит содержится в:
systems-assistant[bot]
2025-08-19 22:01:04 -04:00
коммит произвёл GitHub
родитель 351d598869
Коммит 1f86010ca2
40 изменённых файлов: 3432 добавлений и 1184 удалений
-7
Просмотреть файл
@@ -224,9 +224,6 @@ rocprofiler_systems_add_option(ROCPROFSYS_INSTALL_PERFETTO_TOOLS
rocprofiler_systems_add_option(ROCPROFILER_BUILD_SQLITE3
"Enable building sqlite3 library internally" OFF
)
rocprofiler_systems_add_option(ROCPROFSYS_ENABLE_BENCHMARK
"Enable performance benchmarking capabilities for the project" OFF
)
if(ROCPROFSYS_USE_PAPI)
rocprofiler_systems_add_option(ROCPROFSYS_BUILD_PAPI "Build PAPI from submodule" ON)
@@ -334,10 +331,6 @@ if(ROCPROFSYS_BUILD_TESTING OR "$ENV{ROCPROFSYS_CI}" MATCHES "[1-9]+|ON|on|y|yes
include(CTest)
endif()
if(ROCPROFSYS_ENABLE_BENCHMARK)
add_compile_definitions(-DROCPROFSYS_USE_BENCHMARK=1)
endif()
# ------------------------------------------------------------------------------#
#
# library and executables
+29
Просмотреть файл
@@ -1,3 +1,25 @@
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/defines.hpp.in
@@ -25,6 +47,8 @@ set(core_sources
${CMAKE_CURRENT_LIST_DIR}/state.cpp
${CMAKE_CURRENT_LIST_DIR}/timemory.cpp
${CMAKE_CURRENT_LIST_DIR}/utility.cpp
${CMAKE_CURRENT_LIST_DIR}/agent_manager.cpp
${CMAKE_CURRENT_LIST_DIR}/node_info.cpp
)
set(core_headers
@@ -53,6 +77,9 @@ set(core_headers
${CMAKE_CURRENT_LIST_DIR}/state.hpp
${CMAKE_CURRENT_LIST_DIR}/timemory.hpp
${CMAKE_CURRENT_LIST_DIR}/utility.hpp
${CMAKE_CURRENT_LIST_DIR}/agent.hpp
${CMAKE_CURRENT_LIST_DIR}/agent_manager.hpp
${CMAKE_CURRENT_LIST_DIR}/node_info.hpp
)
add_library(rocprofiler-systems-core-library STATIC)
@@ -69,6 +96,7 @@ add_subdirectory(binary)
add_subdirectory(components)
add_subdirectory(containers)
add_subdirectory(rocpd)
add_subdirectory(trace_cache)
target_include_directories(
rocprofiler-systems-core-library
@@ -94,6 +122,7 @@ target_link_libraries(
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-compile-definitions>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-compile-options>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-perfetto>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-sqlite3>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-timemory>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-mpi>
$<BUILD_INTERFACE:rocprofiler-systems::rocprofiler-systems-rocm>
+3 -2
Просмотреть файл
@@ -43,7 +43,8 @@ enum class agent_type : uint8_t
struct agent
{
agent_type type;
uint64_t id;
uint64_t handle;
uint64_t device_id;
uint32_t node_id;
int32_t logical_node_id;
int32_t logical_node_type_id;
@@ -52,7 +53,7 @@ struct agent
std::string vendor_name;
std::string product_name;
size_t device_id{ 0 };
size_t device_type_index{ 0 };
size_t base_id{ 0 };
#if ROCPROFSYS_USE_ROCM > 0
amdsmi_processor_handle smi_handle = nullptr;
+22 -3
Просмотреть файл
@@ -44,11 +44,30 @@ agent_manager::insert_agent(agent& _agent)
(_agent.type == agent_type::GPU ? _gpu_agents_cnt : _cpu_agents_cnt),
(_agent.type == agent_type::GPU ? "GPU" : "CPU"));
_agent.device_id =
_agent.device_type_index =
(_agent.type == agent_type::GPU ? _gpu_agents_cnt++ : _cpu_agents_cnt++);
_agents.emplace_back(std::make_shared<agent>(_agent));
}
const agent&
agent_manager::get_agent_by_type_index(size_t type_index, agent_type type) const
{
ROCPROFSYS_VERBOSE(3, "Getting agent for type: %s, with type index: %ld\n",
(type == agent_type::GPU) ? "GPU" : "CPU", type_index);
auto _agent =
std::find_if(_agents.begin(), _agents.end(), [&](const auto& agent_ptr) {
return agent_ptr->type == type && agent_ptr->device_type_index == type_index;
});
if(_agent == _agents.end())
{
std::ostringstream oss;
oss << "Agent not found for type index: " << type_index
<< ", type: " << (type == agent_type::GPU ? "GPU" : "CPU");
throw std::out_of_range(oss.str());
}
return **_agent;
}
const agent&
agent_manager::get_agent_by_id(size_t device_id, agent_type type) const
{
@@ -75,7 +94,7 @@ agent_manager::get_agent_by_handle(uint64_t device_handle, agent_type type) cons
device_handle, (type == agent_type::GPU ? "GPU" : "CPU"));
auto _agent =
std::find_if(_agents.begin(), _agents.end(), [&](const auto& agent_ptr) {
return agent_ptr->type == type && agent_ptr->id == device_handle;
return agent_ptr->type == type && agent_ptr->handle == device_handle;
});
if(_agent == _agents.end())
{
@@ -93,7 +112,7 @@ agent_manager::get_agent_by_handle(size_t device_handle) const
ROCPROFSYS_VERBOSE(3, "Getting agent for device handle: %ld\n", device_handle);
auto _agent =
std::find_if(_agents.begin(), _agents.end(), [&](const auto& agent_ptr) {
return agent_ptr->id == device_handle;
return agent_ptr->handle == device_handle;
});
if(_agent == _agents.end())
{
+2 -1
Просмотреть файл
@@ -41,8 +41,9 @@ struct agent_manager
~agent_manager() = default;
void insert_agent(agent& agent);
const agent& get_agent_by_type_index(size_t type_index, agent_type type) const;
const agent& get_agent_by_id(size_t device_id, agent_type type) const;
const agent& get_agent_by_handle(size_t device_id, agent_type type) const;
const agent& get_agent_by_handle(size_t device_handle, agent_type type) const;
const agent& get_agent_by_handle(size_t device_handle) const;
std::vector<std::shared_ptr<agent>> get_agents_by_type(agent_type type) const;
-353
Просмотреть файл
@@ -1,353 +0,0 @@
// MIT License
//
// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <algorithm>
#include <array>
#include <bitset>
#include <chrono>
#include <cstdlib>
#include <iomanip>
#include <iostream>
#include <limits>
#include <mutex>
#include <sstream>
#include <string>
#include <type_traits>
#include <unistd.h>
#include <unordered_map>
#include <vector>
#include "core/benchmark/category.hpp"
#include "core/debug.hpp"
namespace rocprofsys
{
namespace benchmark
{
namespace
{
template <bool enabled, typename category_enum, category_enum... enabled_categories>
struct benchmark_impl
{
template <category_enum... categories>
struct scope
{
scope(const scope&) = delete;
scope& operator=(const scope&) = delete;
~scope() = default;
protected:
scope() = default;
scope(scope&&) = default;
scope& operator=(scope&&) = default;
};
template <category_enum... categories>
static void start()
{}
template <category_enum... categories>
static void end()
{}
template <category_enum... categories>
[[nodiscard]] static scope<categories...> scoped_trace()
{
return scope<categories...>{};
}
static void init_from_env(const char* = nullptr) {}
static void show_results() {}
};
using tid_t = __pid_t;
struct indexed_category
{
size_t category;
tid_t thread_id;
friend bool operator==(const indexed_category& lhs, const indexed_category& rhs)
{
return lhs.category == rhs.category && lhs.thread_id == rhs.thread_id;
}
};
struct indexed_category_hash
{
size_t operator()(const indexed_category& p) const noexcept
{
std::size_t hash1 = std::hash<size_t>{}(p.category);
std::size_t hash2 = std::hash<size_t>{}(p.thread_id);
return hash1 ^ (hash2 << 1);
}
};
template <typename category_enum, category_enum... enabled_categories>
struct benchmark_impl<true, category_enum, enabled_categories...>
{
static_assert(std::is_enum_v<category_enum>, "category_enum must be an enum");
public:
using clock = std::chrono::high_resolution_clock;
using time_point = clock::time_point;
static constexpr size_t _max_categories = static_cast<size_t>(category_enum::count);
template <category_enum... categories>
struct scope
{
friend benchmark_impl;
public:
scope(const scope&) = delete;
scope& operator=(const scope&) = delete;
~scope() { end<categories...>(); }
protected:
scope() { start<categories...>(); }
scope(scope&&) = default;
scope& operator=(scope&&) = default;
};
template <category_enum... categories>
static void start()
{
static const thread_local auto _thread_id = gettid();
const auto now = clock::now();
std::lock_guard lock(m_mutex);
(..., (is_category_defined<categories>([&] {
if(m_enabled.test(to_index(categories)))
m_started[{ to_index(categories), _thread_id }] = now;
})));
}
template <category_enum... categories>
static void end()
{
static const thread_local auto _thread_id = getpid();
const auto _end_time = clock::now();
std::lock_guard lock(m_mutex);
(..., (is_category_defined<categories>([&] {
if(m_enabled.test(to_index(categories)))
end_category(_end_time, categories, _thread_id);
})));
}
template <category_enum... categories>
[[nodiscard]] static scope<categories...> scoped_trace()
{
return scope<categories...>{};
}
static void init_from_env(const char* envVar = "ROCPROFSYS_BENCHMARK_CATEGORIES")
{
std::lock_guard lock(m_mutex);
const auto* env = std::getenv(envVar);
if(env == nullptr || std::string(env).empty())
{
ROCPROFSYS_WARNING(1, "No BENCHMARK categories specified in environment "
"variable ROCPROFSYS_BENCHMARK_CATEGORIES.\n");
return;
}
std::string _str(env);
std::istringstream ss(_str);
std::string token;
while(std::getline(ss, token, ','))
{
token.erase(0, token.find_first_not_of(" \t"));
token.erase(token.find_last_not_of(" \t") + 1);
for(category_enum cat : compiledCategories)
{
if(to_string(cat) == token)
{
m_enabled.set(to_index(cat));
}
}
}
}
static void show_results()
{
std::lock_guard lock(m_mutex);
std::vector<std::pair<category_enum, result_data>> sorted;
for(category_enum cat : compiledCategories)
{
const auto& data = m_results[to_index(cat)];
if(data.count > 0)
{
sorted.emplace_back(cat, data);
}
}
std::sort(sorted.begin(), sorted.end(), [](const auto& a, const auto& b) {
return a.second.total_time > b.second.total_time;
});
constexpr uint32_t _category = 30;
constexpr uint32_t _calls = 8;
constexpr uint32_t _total = 12;
constexpr uint32_t _avg = 10;
constexpr uint32_t _min = 10;
constexpr uint32_t _max = 10;
std::cout << "\033[32m"
<< std::string(_category + _calls + _total + _avg + _min + _max, '=')
<< "\n";
std::cout << "Benchmark Results (Sorted by Total Time):\n";
std::cout << std::string(_category + _calls + _total + _avg + _min + _max, '-')
<< "\n";
std::cout << std::left << std::setw(_category) << "Category" << std::right
<< std::setw(_calls) << "Calls" << std::setw(_total) << "Total(ms)"
<< std::setw(_avg) << "Avg(us)" << std::setw(_min) << "Min(us)"
<< std::setw(_max) << "Max(us)" << "\n";
std::cout << std::string(_category + _calls + _total + _avg + _min + _max, '-')
<< "\n";
for(const auto& [cat, data] : sorted)
{
double totalMs = static_cast<double>(data.total_time) / 1000.0;
double avgUs = static_cast<double>(data.total_time) / data.count;
std::cout << std::left << std::setw(_category) << to_string(cat) << std::right
<< std::setw(_calls) << data.count << std::setw(_total)
<< std::fixed << std::setprecision(3) << totalMs << std::setw(_avg)
<< std::fixed << std::setprecision(1) << avgUs << std::setw(_min)
<< data.min_time << std::setw(_max) << data.max_time << "\n";
}
std::cout << std::string(_category + _calls + _total + _avg + _min + _max, '=')
<< "\033[0m" << "\n\n";
}
private:
struct result_data
{
uint64_t total_time = 0;
size_t count = 0;
uint64_t min_time = std::numeric_limits<uint64_t>::max();
uint64_t max_time = std::numeric_limits<uint64_t>::min();
void update(uint64_t duration)
{
total_time += duration;
count += 1;
if(duration < min_time) min_time = duration;
if(duration > max_time) max_time = duration;
}
};
static constexpr size_t to_index(category_enum cat)
{
return static_cast<size_t>(cat);
}
static void end_category(const time_point& end_time, category_enum cat,
const tid_t thread_id)
{
const size_t _idx = to_index(cat);
auto _it = m_started.find({ _idx, thread_id });
if(_it == m_started.end())
{
ROCPROFSYS_WARNING(1, "Benchmark error: missing start time for category!\n");
return;
}
auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(end_time - _it->second)
.count();
m_started.erase(_it);
m_results[_idx].update(duration);
}
template <category_enum Cat, typename Func>
static constexpr void is_category_defined(Func&& f)
{
if constexpr(((Cat == enabled_categories) || ...))
{
f();
}
}
static constexpr std::array<category_enum, sizeof...(enabled_categories)>
compiledCategories = { enabled_categories... };
static inline std::unordered_map<indexed_category, time_point, indexed_category_hash>
m_started;
static inline std::array<result_data, _max_categories> m_results{};
static inline std::bitset<_max_categories> m_enabled;
static inline std::mutex m_mutex;
};
#ifdef ROCPROFSYS_ENABLE_BENCHMARK
using _benchmark_impl = benchmark::benchmark_impl<
static_cast<bool>(ROCPROFSYS_ENABLE_BENCHMARK), benchmark::category,
benchmark::category::kernel_dispatch, benchmark::category::memory_copy,
benchmark::category::memory_allocate, benchmark::category::db_entry_kernel_dispatch,
benchmark::category::db_entry_memory_copy,
benchmark::category::db_entry_memory_allocate,
benchmark::category::perfetto_kernel_dispatch,
benchmark::category::sdk_tool_buffered_tracing>;
#else
using _benchmark_impl = benchmark::benchmark_impl<false, benchmark::category>;
#endif
} // namespace
template <category... categories>
void
start()
{
_benchmark_impl::template start<categories...>();
}
template <category... categories>
void
end()
{
_benchmark_impl::template end<categories...>();
}
template <category... categories>
[[nodiscard]] auto
scoped_trace()
{
return _benchmark_impl::template scoped_trace<categories...>();
}
inline void
init_from_env(const char* envVar = "BENCHMARK_CATEGORIES")
{
_benchmark_impl::init_from_env(envVar);
}
inline void
show_results()
{
_benchmark_impl::show_results();
}
} // namespace benchmark
} // namespace rocprofsys
-68
Просмотреть файл
@@ -1,68 +0,0 @@
// Copyright (c) 2018-2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// with the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
//
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in the
// documentation and/or other materials provided with the distribution.
//
// * Neither the names of Advanced Micro Devices, Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this Software without specific prior written permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
#pragma once
#include <string_view>
namespace rocprofsys
{
namespace benchmark
{
enum class category
{
kernel_dispatch,
db_entry_kernel_dispatch,
memory_copy,
db_entry_memory_copy,
memory_allocate,
db_entry_memory_allocate,
perfetto_kernel_dispatch,
sdk_tool_buffered_tracing,
count
};
constexpr std::string_view
to_string(category cat)
{
switch(cat)
{
case category::kernel_dispatch: return "kernel_dispatch";
case category::db_entry_kernel_dispatch: return "db_entry_kernel_dispatch";
case category::memory_copy: return "memory_copy";
case category::memory_allocate: return "memory_allocate";
case category::db_entry_memory_copy: return "db_entry_memory_copy";
case category::db_entry_memory_allocate: return "db_entry_memory_allocate";
case category::perfetto_kernel_dispatch: return "perfetto_kernel_dispatch";
case category::sdk_tool_buffered_tracing: return "sdk_tool_buffered_tracing";
default: return "unknown";
}
}
} // namespace benchmark
} // namespace rocprofsys
+2
Просмотреть файл
@@ -96,6 +96,7 @@ ROCPROFSYS_DEFINE_CATEGORY(category, rocm_hip_api, ROCPROFSYS_CATEGORY_ROCM_HIP_
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_hsa_api, ROCPROFSYS_CATEGORY_ROCM_HSA_API, "rocm_hsa_api", "ROCm HSA functions")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_kernel_dispatch, ROCPROFSYS_CATEGORY_ROCM_KERNEL_DISPATCH, "rocm_kernel_dispatch", "ROCm Kernel dispatch")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_memory_copy, ROCPROFSYS_CATEGORY_ROCM_MEMORY_COPY, "rocm_memory_copy", "ROCm Async Memory Copy")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_memory_allocate, ROCPROFSYS_CATEGORY_ROCM_MEMORY_ALLOCATE, "rocm_memory_allocate", "ROCm Memory Allocations")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_hip_stream, ROCPROFSYS_CATEGORY_ROCM_HIP_STREAM, "rocm_hip_stream", "ROCm HIP Stream")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_scratch_memory, ROCPROFSYS_CATEGORY_ROCM_SCRATCH_MEMORY, "rocm_scratch_memory", "ROCm kernel scratch memory reallocations")
ROCPROFSYS_DEFINE_CATEGORY(category, rocm_page_migration, ROCPROFSYS_CATEGORY_ROCM_PAGE_MIGRATION, "rocm_page_migration", "ROCm memory page migration")
@@ -167,6 +168,7 @@ using name = perfetto_category<Tp...>;
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_hsa_api), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_kernel_dispatch), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_memory_copy), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_memory_allocate), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_hip_stream), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_scratch_memory), \
ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_page_migration), \
+23
Просмотреть файл
@@ -1,3 +1,25 @@
// MIT License
//
// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "cpu.hpp"
#include "agent_manager.hpp"
@@ -153,6 +175,7 @@ query_cpu_agents()
auto logical_id = id_count++;
auto id = cpu_count++;
auto cur_agent = agent{ agent_type::CPU,
0,
id,
node_id,
logical_id,
+13 -12
Просмотреть файл
@@ -124,19 +124,20 @@ query_rocm_agents()
auto& _agent_manager = agent_manager::get_instance();
for(size_t i = 0; i < num_agents; ++i)
{
const auto* _agent = static_cast<const rocprofiler_agent_v0_t*>(agents[i]);
auto cur_agent = agent{
const auto* _agent = static_cast<const rocprofiler_agent_v0_t*>(agents[i]);
agent cur_agent;
cur_agent.type =
(_agent->type == ROCPROFILER_AGENT_TYPE_GPU ? agent_type::GPU
: agent_type::CPU),
_agent->device_id,
_agent->node_id,
_agent->logical_node_id,
_agent->logical_node_type_id,
std::string(_agent->name),
std::string(_agent->vendor_name),
std::string(_agent->product_name),
std::string(_agent->model_name),
};
: agent_type::CPU);
cur_agent.handle = _agent->id.handle;
cur_agent.device_id = _agent->device_id;
cur_agent.node_id = _agent->node_id;
cur_agent.logical_node_id = _agent->logical_node_id;
cur_agent.logical_node_type_id = _agent->logical_node_type_id;
cur_agent.name = std::string(_agent->name);
cur_agent.model_name = std::string(_agent->model_name);
cur_agent.vendor_name = std::string(_agent->vendor_name);
cur_agent.product_name = std::string(_agent->product_name);
_agent_manager.insert_agent(cur_agent);
}
return ROCPROFILER_STATUS_SUCCESS;
+22
Просмотреть файл
@@ -1,3 +1,25 @@
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
set(rocpd_sources
${CMAKE_CURRENT_LIST_DIR}/data_processor.cpp
${CMAKE_CURRENT_LIST_DIR}/json.cpp
+18 -81
Просмотреть файл
@@ -69,8 +69,7 @@ data_processor::initialize_metadata()
size_t
data_processor::insert_string(const char* str)
{
std::lock_guard<std::mutex> lock(_data_mutex);
auto it = _string_map.find(str);
auto it = _string_map.find(str);
if(it != _string_map.end()) return _string_map.at(str);
data_storage::queries::table_insert_query query;
@@ -242,24 +241,12 @@ data_processor::insert_sample(const char* track, uint64_t timestamp, size_t even
}
size_t
data_processor::insert_event(size_t category_id, size_t stack_id, size_t parent_stack_id,
size_t correlation_id, const char* call_stack,
const char* line_info, const char* extdata)
data_processor::insert_event(size_t string_primary_key, size_t stack_id,
size_t parent_stack_id, size_t correlation_id,
const char* call_stack, const char* line_info,
const char* extdata)
{
std::lock_guard<std::mutex> lock(_data_mutex);
auto it = _category_map.find(category_id);
if(it == _category_map.end())
{
std::ostringstream oss;
oss << "Insert event failed! Error: Unknown category id: " << category_id
<< " for UPID: " << _upid;
throw std::runtime_error(oss.str());
}
ROCPROFSYS_VERBOSE(3, "Insert event category id: %ld, string id: %ld\n", category_id,
it->second);
_insert_event_statement(_upid.c_str(), it->second, stack_id, parent_stack_id,
_insert_event_statement(_upid.c_str(), string_primary_key, stack_id, parent_stack_id,
correlation_id, call_stack, line_info, extdata);
return data_storage::database::get_instance().get_last_insert_id();
}
@@ -456,7 +443,6 @@ void
data_processor::insert_args(size_t event_id, size_t position, const char* type,
const char* name, const char* value, const char* extdata)
{
std::lock_guard<std::mutex> lock(_data_mutex);
_insert_args_statement(_upid.c_str(), event_id, position, type, name, value, extdata);
}
@@ -464,40 +450,24 @@ void
data_processor::insert_stream_info(size_t stream_id, size_t node_id, size_t process_id,
const char* name, const char* extdata)
{
if(_stream_ids.count(stream_id) > 0)
{
// ROCPROFSYS_WARNING(
// 1, "Insert stream info failed! Error: Stream ID %ld already exists!\n",
// stream_id);
return;
}
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
query.set_table_name("rocpd_info_stream_" + _upid)
.set_columns("id", "guid", "nid", "pid", "name", "extdata")
.set_values(stream_id, _upid, node_id, process_id, name, extdata)
.get_query_string());
_stream_ids.insert(stream_id);
}
void
data_processor::insert_queue_info(size_t queue_id, size_t node_id, size_t process_id,
const char* name, const char* extdata)
{
if(_queue_ids.count(queue_id) > 0)
{
// ROCPROFSYS_WARNING(
// 1, "Insert queue info failed! Error: Queue ID %ld already exists!\n",
// queue_id);
return;
}
data_storage::queries::table_insert_query query;
data_storage::database::get_instance().execute_query(
query.set_table_name("rocpd_info_queue_" + _upid)
.set_columns("id", "guid", "nid", "pid", "name", "extdata")
.set_values(queue_id, _upid, node_id, process_id, name, extdata)
.get_query_string());
_queue_ids.insert(queue_id);
}
void
@@ -506,20 +476,9 @@ data_processor::insert_code_object(size_t id, size_t node_id, size_t process_id,
uint64_t ld_size, uint64_t ld_delta,
const char* storage_type, const char* extdata)
{
if(_code_object_ids.count(id) > 0)
{
// ROCPROFSYS_WARNING(
// 1,
// "Insert code object info failed! Error: Code object ID %ld already
// exists!\n", id);
return;
}
ROCPROFSYS_VERBOSE(2, "Insert code object with ID: %ld\n", id);
std::lock_guard<std::mutex> lock(_data_mutex);
_insert_code_object_statement(id, _upid.c_str(), node_id, process_id, agent_id, uri,
ld_base, ld_size, ld_delta, storage_type, extdata);
_code_object_ids.insert(id);
}
void
@@ -530,40 +489,11 @@ data_processor::insert_kernel_symbol(
uint32_t private_segment_size, uint32_t sgrp_count, uint32_t arch_vgrp_count,
uint32_t accum_vgrp_count, const char* extdata)
{
if(_kernel_sym_ids.count(id) > 0)
{
// ROCPROFSYS_WARNING(
// 1,
// "Insert kernel symbol failed! Error: Kernel symbol ID %ld already
// exists!\n", id);
return;
}
ROCPROFSYS_VERBOSE(2, "Insert kernel symbol: %s with ID: %ld\n", name, id);
std::lock_guard<std::mutex> lock(_data_mutex);
_insert_kernel_symbol_statement(
id, _upid.c_str(), node_id, process_id, code_obj_id, name, display_name,
kernel_obj, kernarg_segmnt_size, kernarg_segment_alignment, group_segment_size,
private_segment_size, sgrp_count, arch_vgrp_count, accum_vgrp_count, extdata);
_kernel_sym_ids.insert(id);
}
void
data_processor::insert_category(size_t category_id, const char* name)
{
auto it = _category_map.find(category_id);
if(it != _category_map.end())
{
// ROCPROFSYS_WARNING(
// 1, "Insert category failed! Error: Category %s already exist!\n", name);
return;
}
auto name_id = insert_string(name);
std::lock_guard<std::mutex> lock(_data_mutex);
ROCPROFSYS_VERBOSE(2, "Insert category: name: %s, id: %ld, name id: %ld\n", name,
category_id, name_id);
_category_map.emplace(category_id, name_id);
}
void
@@ -571,7 +501,6 @@ data_processor::insert_region(size_t node_id, size_t process_id, size_t thread_i
uint64_t start, uint64_t end, size_t name_id,
size_t event_id, const char* extdata)
{
std::lock_guard<std::mutex> lock(_data_mutex);
ROCPROFSYS_VERBOSE(2, "Insert region for event id: %ld\n", event_id);
_insert_region_statement(_upid.c_str(), node_id, process_id, thread_id, start, end,
@@ -587,8 +516,6 @@ data_processor::insert_kernel_dispatch(
size_t grid_size_x, size_t grid_size_y, size_t grid_size_z, size_t region_name_id,
size_t event_id, const char* extdata)
{
std::lock_guard<std::mutex> lock(_data_mutex);
ROCPROFSYS_VERBOSE(2, "Insert kernel dispatch for event id: %ld\n", event_id);
_insert_kernel_dispatch_statement(
@@ -607,8 +534,6 @@ data_processor::insert_memory_copy(size_t node_id, size_t process_id, size_t thr
size_t region_name_id, size_t event_id,
const char* extdata)
{
std::lock_guard<std::mutex> lock(_data_mutex);
_insert_memory_copy_statement(_upid.c_str(), node_id, process_id, thread_id, start,
end, name_id, dst_agent_id, dst_addr, src_agent_id,
src_addr, size, queue_id, stream_id, region_name_id,
@@ -663,6 +588,18 @@ data_processor::insert_thread_info(size_t node_id, size_t parent_process_id,
return thread_idx;
}
size_t
data_processor::map_thread_id_to_primary_key(size_t thread_id)
{
auto it = _thread_id_map.find(thread_id);
if(it == _thread_id_map.end())
{
throw std::invalid_argument("Given thread id don't exist");
}
return _thread_id_map.at(thread_id);
}
void
data_processor::flush()
{
+8 -15
Просмотреть файл
@@ -39,7 +39,7 @@ struct data_processor
using insert_event_stmt =
std::function<void(const char*, size_t, size_t, size_t, size_t, const char*,
const char*, const char*)>;
using insert_pmc_event_stms =
using insert_pmc_event_stmt =
std::function<void(const char*, size_t, size_t, double, const char*)>;
using insert_sample_stmt =
std::function<void(const char*, size_t, uint64_t, size_t, const char*)>;
@@ -124,9 +124,10 @@ public:
void insert_track(const char* track_name, size_t node_id, size_t process_id,
std::optional<size_t> thread_id, const char* extdata = "{}");
size_t insert_event(size_t category_id, size_t stack_id, size_t parent_stack_id,
size_t correlation_id, const char* call_stack = "{}",
const char* line_info = "{}", const char* extdata = "{}");
size_t insert_event(size_t string_primary_key, size_t stack_id,
size_t parent_stack_id, size_t correlation_id,
const char* call_stack = "{}", const char* line_info = "{}",
const char* extdata = "{}");
void insert_pmc_event(size_t event_id, size_t agent_id, const char* pmc_descriptor,
double value, const char* extdata = "{}");
@@ -143,8 +144,6 @@ public:
void insert_sample(const char* track, uint64_t timestamp, size_t event_id,
const char* extdata = "{}");
void insert_category(size_t category_id, const char* name);
void insert_region(size_t node_id, size_t process_id, size_t thread_id,
uint64_t start, uint64_t end, size_t name_id, size_t event_id,
const char* extdata = "{}");
@@ -199,6 +198,8 @@ public:
size_t stream_id, size_t event_id,
const char* extdata = "{}");
size_t map_thread_id_to_primary_key(size_t thread_id);
void flush();
private:
@@ -223,16 +224,10 @@ private:
std::unordered_map<pmc_identifier, size_t, pmc_identifier_hash, pmc_identifier_equal>
_pmc_descriptor_map;
std::unordered_map<size_t, size_t> _thread_id_map;
std::unordered_map<size_t, size_t> _category_map;
std::unordered_map<std::string, size_t> _string_map;
std::set<uint64_t> _code_object_ids;
std::set<uint64_t> _kernel_sym_ids;
std::set<uint64_t> _stream_ids;
std::set<uint64_t> _queue_ids;
insert_event_stmt _insert_event_statement;
insert_pmc_event_stms _insert_pmc_event_statement;
insert_pmc_event_stmt _insert_pmc_event_statement;
insert_sample_stmt _insert_sample_statement;
insert_region_stmt _insert_region_statement;
insert_kernel_dispatch_stmt _insert_kernel_dispatch_statement;
@@ -244,8 +239,6 @@ private:
insert_memory_alloc_no_agent_stmt _insert_memory_alloc_no_agent_statement;
std::string _upid{};
std::mutex _data_mutex;
};
} // namespace rocpd
+4 -3
Просмотреть файл
@@ -93,9 +93,8 @@ database::initialize_schema()
return new_file_path;
}
}
return std::string(
"rocprofiler-systems/source/lib/core/rocpd/data_storage/schema/")
.append(filename);
// TODO: Update to look for the system's rocpd schema
return std::string("source/lib/core/rocpd/data_storage/schema/").append(filename);
};
std::vector<std::string_view> schema_files = { "rocpd_tables.sql", "rocpd_views.sql",
@@ -118,9 +117,11 @@ database::initialize_schema()
std::string query = ss_query.str();
std::regex upid_pattern("\\{\\{uuid\\}\\}");
std::regex guid_pattern("\\{\\{guid\\}\\}");
std::regex view_upid_pattern("\\{\\{view_upid\\}\\}");
query = std::regex_replace(query, upid_pattern, "_" + get_upid());
query = std::regex_replace(query, guid_pattern, get_upid());
query = std::regex_replace(query, view_upid_pattern, "");
validate_sqlite3_result(
+13
Просмотреть файл
@@ -495,6 +495,9 @@ get_buffered_domains()
const auto supported = std::unordered_set<rocprofiler_buffer_tracing_kind_t>{
ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH,
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
# if(ROCPROFILER_VERSION >= 600)
ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION,
# endif
# if(ROCPROFILER_VERSION < 10000)
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION,
# endif
@@ -540,6 +543,16 @@ get_buffered_domains()
{
_data.emplace(ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API);
}
# if(ROCPROFILER_VERSION >= 600)
else if(itr == "memory_allocation")
{
_data.emplace(ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION);
}
# endif
else if(itr == "memory_copy")
{
_data.emplace(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY);
}
else
{
for(size_t idx = 0; idx < buffer_tracing_info.size(); ++idx)
+44
Просмотреть файл
@@ -0,0 +1,44 @@
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
set(trace_cache_sources
${CMAKE_CURRENT_LIST_DIR}/cache_manager.cpp
${CMAKE_CURRENT_LIST_DIR}/storage_parser.cpp
${CMAKE_CURRENT_LIST_DIR}/buffer_storage.cpp
${CMAKE_CURRENT_LIST_DIR}/metadata_registry.cpp
${CMAKE_CURRENT_LIST_DIR}/rocpd_post_processing.cpp
)
set(trace_cache_headers
${CMAKE_CURRENT_LIST_DIR}/cache_manager.hpp
${CMAKE_CURRENT_LIST_DIR}/storage_parser.hpp
${CMAKE_CURRENT_LIST_DIR}/buffer_storage.hpp
${CMAKE_CURRENT_LIST_DIR}/cache_utility.hpp
${CMAKE_CURRENT_LIST_DIR}/metadata_registry.hpp
${CMAKE_CURRENT_LIST_DIR}/rocpd_post_processing.hpp
${CMAKE_CURRENT_LIST_DIR}/sample_type.hpp
)
target_sources(
rocprofiler-systems-core-library
PRIVATE ${trace_cache_sources} ${trace_cache_headers}
)
+178
Просмотреть файл
@@ -0,0 +1,178 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "buffer_storage.hpp"
#include "PTL/Task.hh"
#include "PTL/TaskGroup.hh"
#include "PTL/ThreadPool.hh"
#include "debug.hpp"
#include "library/runtime.hpp"
#include <chrono>
#include <memory>
#include <mutex>
#include <stdexcept>
#include <unistd.h>
using namespace std::chrono_literals;
namespace rocprofsys
{
namespace trace_cache
{
namespace
{
constexpr auto CACHE_FILE_FLUSH_TIMEOUT = 10ms;
constexpr auto NUM_OF_THREADS = 1;
} // namespace
buffer_storage::buffer_storage(pid_t _pid)
{
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
m_thread_pool = std::make_unique<PTL::ThreadPool>(NUM_OF_THREADS);
m_thread_pool->initialize_threadpool(NUM_OF_THREADS);
m_task_group = std::make_unique<PTL::TaskGroup<void>>(m_thread_pool.get());
m_task_group->exec([this, _pid]() {
std::ofstream _ofs(filename, std::ios::binary | std::ios::out);
if(!_ofs)
{
std::stringstream _ss;
_ss << "Error opening file for writing: " << filename;
throw std::runtime_error(_ss.str());
}
auto execute_flush = [&](std::ofstream& ofs, bool force = false) {
size_t _head, _tail;
{
std::lock_guard guard{ m_mutex };
_head = m_head;
_tail = m_tail;
if(_head == _tail)
{
return;
}
auto used_space =
m_head > m_tail ? (m_head - m_tail) : (buffer_size - m_tail + m_head);
if(!force && used_space < flush_threshold)
{
return;
}
m_tail = m_head;
}
if(_head > _tail)
{
ofs.write(reinterpret_cast<const char*>(m_buffer->data() + _tail),
_head - _tail);
}
else
{
ofs.write(reinterpret_cast<const char*>(m_buffer->data() + _tail),
buffer_size - _tail);
ofs.write(reinterpret_cast<const char*>(m_buffer->data()), _head);
}
};
ROCPROFSYS_DEBUG("Starting buffered storage flushing thread for pid %d",
static_cast<int>(_pid));
m_created_process = _pid;
std::mutex _shutdown_condition_mutex;
while(m_running)
{
execute_flush(_ofs);
std::unique_lock _lock{ _shutdown_condition_mutex };
m_shutdown_condition.wait_for(
_lock, std::chrono::milliseconds(CACHE_FILE_FLUSH_TIMEOUT),
[&]() { return !m_running; });
}
execute_flush(_ofs, true);
_ofs.close();
m_exit_finished = true;
m_exit_condition.notify_one();
});
}
void
buffer_storage::shutdown()
{
ROCPROFSYS_DEBUG("Buffer storage shutting down..");
m_running = false;
m_shutdown_condition.notify_all();
if(m_created_process != getpid())
{
ROCPROFSYS_DEBUG(
"Buffer storage is not created in same process as shutting down..");
return;
}
std::mutex _exit_mutex;
std::unique_lock _exit_lock{ _exit_mutex };
m_exit_condition.wait(_exit_lock, [&]() { return m_exit_finished; });
m_thread_pool->destroy_threadpool();
}
void
buffer_storage::fragment_memory()
{
auto* _data = m_buffer->data();
memset(_data + m_head, 0xFFFF, buffer_size - m_head);
*reinterpret_cast<entry_type*>(_data + m_head) = entry_type::fragmented_space;
size_t remaining_bytes = buffer_size - m_head - minimal_fragmented_memory_size;
*reinterpret_cast<size_t*>(_data + m_head + sizeof(entry_type)) = remaining_bytes;
m_head = 0;
}
uint8_t*
buffer_storage::reserve_memory_space(size_t len)
{
size_t _size;
{
std::lock_guard scope{ m_mutex };
if((m_head + len + minimal_fragmented_memory_size) > buffer_size)
{
fragment_memory();
}
_size = m_head;
m_head = m_head + len;
}
auto* _result = m_buffer->data() + _size;
memset(_result, 0, len);
return _result;
}
bool
buffer_storage::is_running() const
{
return m_running;
}
} // namespace trace_cache
} // namespace rocprofsys
+162
Просмотреть файл
@@ -0,0 +1,162 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "PTL/TaskGroup.hh"
#include "PTL/ThreadPool.hh"
#include "cache_utility.hpp"
#include "sample_type.hpp"
#include <cassert>
#include <condition_variable>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <mutex>
#include <stdexcept>
#include <stdint.h>
#include <string.h>
#include <thread>
#include <type_traits>
#include <PTL/PTL.hh>
#include <unistd.h>
namespace rocprofsys
{
namespace trace_cache
{
class cache_manager;
class buffer_storage
{
public:
static buffer_storage& get_instance();
template <typename... T>
void store(entry_type type, T&&... values)
{
if(!is_running())
{
throw std::runtime_error(
"Trying to use buffered storage while it is not running");
return;
}
constexpr bool is_supported_type = (supported_types::is_supported<T> && ...);
static_assert(is_supported_type, "Supported types are const char*, char*, "
"unsigned long, unsigned int, and int.");
auto arg_size = get_size(values...);
auto total_size = arg_size + sizeof(type) + sizeof(size_t);
auto* reserved_memory = reserve_memory_space(total_size);
size_t position = 0;
auto store_value = [&](const auto& val) {
using Type = decltype(val);
size_t len = 0;
auto* dest = reserved_memory + position;
if constexpr(std::is_same_v<std::decay_t<Type>, const char*>)
{
len = strlen(val) + 1;
std::memcpy(dest, val, len);
}
else
{
using ClearType = std::decay_t<decltype(val)>;
len = sizeof(ClearType);
*reinterpret_cast<ClearType*>(dest) = val;
}
position += len;
};
store_value(type);
store_value(arg_size);
(store_value(values), ...);
}
private:
friend class cache_manager;
buffer_storage(pid_t _pid);
void shutdown();
bool is_running() const;
void fragment_memory();
uint8_t* reserve_memory_space(size_t len);
template <typename... Types>
struct typelist
{
template <typename T>
constexpr static bool is_supported =
(std::is_same_v<std::decay_t<T>, Types> || ...);
};
using supported_types = typelist<const char*, char*, uint64_t, int32_t, uint32_t>;
template <typename T>
static constexpr bool is_string_literal_v =
std::is_same_v<std::decay_t<T>, const char*> ||
std::is_same_v<std::decay_t<T>, char*>;
template <typename T>
constexpr size_t get_size_impl(T&& val)
{
if constexpr(is_string_literal_v<T>)
{
size_t size = 0;
while(val[size] != '\0')
{
size++;
}
return ++size;
}
else
{
return sizeof(T);
}
}
template <typename... T>
constexpr size_t get_size(T&&... val)
{
auto total_size = 0;
((total_size += get_size_impl(val)), ...);
return total_size;
}
private:
std::mutex m_mutex;
std::condition_variable m_exit_condition;
bool m_exit_finished{ false };
bool m_running{ true };
std::condition_variable m_shutdown_condition;
std::unique_ptr<PTL::ThreadPool>m_thread_pool;
std::unique_ptr<PTL::TaskGroup<void>> m_task_group;
size_t m_head{ 0 };
size_t m_tail{ 0 };
std::unique_ptr<buffer_array_t> m_buffer{ std::make_unique<buffer_array_t>() };
pid_t m_created_process;
};
} // namespace trace_cache
} // namespace rocprofsys
+79
Просмотреть файл
@@ -0,0 +1,79 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "cache_manager.hpp"
#include "core/config.hpp"
#include "core/trace_cache/storage_parser.hpp"
#include "debug.hpp"
#include "trace_cache/rocpd_post_processing.hpp"
namespace rocprofsys
{
namespace trace_cache
{
cache_manager&
cache_manager::get_instance()
{
static cache_manager instance;
return instance;
}
cache_manager::cache_manager()
: m_postprocessing{ m_metadata }
{
m_postprocessing.register_parser_callback(m_parser);
}
void
cache_manager::post_process()
{
if(m_storage.is_running())
{
ROCPROFSYS_WARNING(2, "Postprocessing called without previously shutting down "
"cache storage. Calling shutdown explicitly..\n");
shutdown();
}
if(get_use_rocpd())
{
ROCPROFSYS_PRINT(
"Generating rocpd with collected data. This may take a while..\n");
}
post_process_metadata();
m_parser.consume_storage();
}
void
cache_manager::post_process_metadata()
{
m_postprocessing.post_process_metadata();
}
void
cache_manager::shutdown()
{
m_storage.shutdown();
}
} // namespace trace_cache
} // namespace rocprofsys
+67
Просмотреть файл
@@ -0,0 +1,67 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "buffer_storage.hpp"
#include "core/trace_cache/rocpd_post_processing.hpp"
#include "metadata_registry.hpp"
#include "storage_parser.hpp"
namespace rocprofsys
{
namespace trace_cache
{
class cache_manager
{
public:
static cache_manager& get_instance();
buffer_storage& get_buffer_storage() { return m_storage; }
metadata_registry& get_metadata_regsitry() { return m_metadata; }
void shutdown();
void post_process();
private:
void post_process_metadata();
cache_manager();
buffer_storage m_storage{ getpid() };
metadata_registry m_metadata;
storage_parser m_parser{ getpid() };
rocpd_post_processing m_postprocessing;
};
inline metadata_registry&
get_metadata_registry()
{
return cache_manager::get_instance().get_metadata_regsitry();
}
inline buffer_storage&
get_buffer_storage()
{
return cache_manager::get_instance().get_buffer_storage();
}
} // namespace trace_cache
} // namespace rocprofsys
+45
Просмотреть файл
@@ -0,0 +1,45 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "sample_type.hpp"
#include <array>
#include <string>
#include <timemory/units.hpp>
#include <unistd.h>
namespace rocprofsys
{
namespace trace_cache
{
constexpr size_t buffer_size = 100 * tim::units::megabyte;
constexpr size_t flush_threshold = 80 * tim::units::megabyte;
const auto filename = "/tmp/buffered_storage_" + std::to_string(getpid()) + ".bin";
constexpr size_t minimal_fragmented_memory_size = sizeof(entry_type) + sizeof(size_t);
using buffer_array_t = std::array<uint8_t, buffer_size>;
constexpr auto ABSOLUTE = "ABS";
constexpr auto PERCENTAGE = "%";
} // namespace trace_cache
} // namespace rocprofsys
+296
Просмотреть файл
@@ -0,0 +1,296 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "metadata_registry.hpp"
#include <algorithm>
#include <cstdint>
namespace rocprofsys
{
namespace trace_cache
{
namespace
{
template <typename ReturnType, typename DataType, typename Filter>
std::optional<ReturnType>
get_type_info(const DataType& data, const Filter& filter)
{
std::optional<ReturnType> result = std::nullopt;
data.rlock([&filter, &result](const auto& _data) {
auto it = std::find_if(_data.begin(), _data.end(), filter);
result = it == _data.end() ? std::nullopt : std::optional<ReturnType>(*it);
});
return result;
}
template <typename T>
auto
assign_set_to_vector(T& result)
{
return [&result](const auto& _data) { result.assign(_data.cbegin(), _data.cend()); };
}
} // namespace
void
metadata_registry::set_process(const info::process& process)
{
m_process.wlock([&process](auto& _process) { _process = process; });
}
void
metadata_registry::add_pmc_info(const info::pmc& pmc_info)
{
m_pmc_infos.wlock([&pmc_info](auto& _data) {
if(_data.count(pmc_info) > 0)
{
return;
}
_data.emplace(pmc_info);
});
}
void
metadata_registry::add_thread_info(const info::thread& thread_info)
{
m_threads.wlock([&thread_info](auto& _data) {
if(_data.count(thread_info) > 0)
{
return;
}
_data.emplace(thread_info);
});
}
void
metadata_registry::add_track(const info::track& track_info)
{
m_tracks.wlock([&track_info](auto& _data) {
if(_data.count(track_info) > 0)
{
return;
}
_data.emplace(track_info);
});
}
void
metadata_registry::add_queue(const uint64_t& queue_handle)
{
m_queues.wlock([&queue_handle](auto& _data) {
if(_data.count(queue_handle) > 0)
{
return;
}
_data.emplace(queue_handle);
});
}
void
metadata_registry::add_stream(const uint64_t& stream_handle)
{
m_streams.wlock([&stream_handle](auto& _data) {
if(_data.count(stream_handle) > 0)
{
return;
}
_data.emplace(stream_handle);
});
}
void
metadata_registry::add_string(const std::string_view& string_value)
{
m_strings.wlock([&string_value](auto& _data) {
if(_data.count(string_value) > 0)
{
return;
}
_data.emplace(string_value);
});
}
info::process
metadata_registry::get_process_info() const
{
info::process result;
m_process.rlock([&result](const auto& _process) { result = _process; });
return result;
}
std::optional<info::pmc>
metadata_registry::get_pmc_info(const std::string_view& unique_name) const
{
return get_type_info<info::pmc>(m_pmc_infos, [&unique_name](const info::pmc& val) {
return val.name == unique_name;
});
}
std::optional<info::thread>
metadata_registry::get_thread_info(const uint32_t& thread_id) const
{
return get_type_info<info::thread>(m_threads, [&thread_id](const info::thread& val) {
return val.thread_id == thread_id;
});
}
std::optional<info::track>
metadata_registry::get_track_info(const std::string_view& track_name) const
{
return get_type_info<info::track>(m_tracks, [&track_name](const info::track& val) {
return val.track_name == track_name;
});
}
std::vector<info::pmc>
metadata_registry::get_pmc_info_list() const
{
std::vector<info::pmc> result;
m_pmc_infos.rlock(assign_set_to_vector(result));
return result;
}
std::vector<info::thread>
metadata_registry::get_thread_info_list() const
{
std::vector<info::thread> result;
m_threads.rlock(assign_set_to_vector(result));
return result;
}
std::vector<info::track>
metadata_registry::get_track_info_list() const
{
std::vector<info::track> result;
m_tracks.rlock(assign_set_to_vector(result));
return result;
}
std::vector<uint64_t>
metadata_registry::get_queue_list() const
{
std::vector<uint64_t> result;
m_queues.rlock(assign_set_to_vector(result));
return result;
}
std::vector<uint64_t>
metadata_registry::get_stream_list() const
{
std::vector<uint64_t> result;
m_streams.rlock(assign_set_to_vector(result));
return result;
}
std::vector<std::string_view>
metadata_registry::get_string_list() const
{
std::vector<std::string_view> result;
m_strings.rlock(assign_set_to_vector(result));
return result;
}
#if ROCPROFSYS_USE_ROCM
void
metadata_registry::add_code_object(
const rocprofiler_callback_tracing_code_object_load_data_t& code_object)
{
m_code_objects.wlock([&code_object](auto& _data) {
if(_data.count(code_object) > 0)
{
return;
}
_data.emplace(code_object);
});
}
void
metadata_registry::add_kernel_symbol(
const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t&
kernel_symbol)
{
m_kernel_symbols.wlock([&kernel_symbol](auto& _data) {
if(_data.count(kernel_symbol) > 0)
{
return;
}
_data.emplace(kernel_symbol);
});
}
std::optional<rocprofiler_callback_tracing_code_object_load_data_t>
metadata_registry::get_code_object(uint64_t code_object_id) const
{
return get_type_info<rocprofiler_callback_tracing_code_object_load_data_t>(
m_code_objects,
[&code_object_id](
const rocprofiler_callback_tracing_code_object_load_data_t& val) {
return val.code_object_id == code_object_id;
});
}
std::optional<rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t>
metadata_registry::get_kernel_symbol(uint64_t kernel_id) const
{
return get_type_info<
rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t>(
m_kernel_symbols,
[&kernel_id](
const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t&
val) { return val.kernel_id == kernel_id; });
}
std::vector<rocprofiler_callback_tracing_code_object_load_data_t>
metadata_registry::get_code_object_list() const
{
std::vector<rocprofiler_callback_tracing_code_object_load_data_t> result;
m_code_objects.rlock(assign_set_to_vector(result));
return result;
}
std::vector<rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t>
metadata_registry::get_kernel_symbol_list() const
{
std::vector<rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t>
result;
m_kernel_symbols.rlock(assign_set_to_vector(result));
return result;
}
rocprofiler::sdk::buffer_name_info_t<const char*>
metadata_registry::get_buffer_name_info() const
{
return m_buffered_tracing_info;
}
rocprofiler::sdk::callback_name_info_t<const char*>
metadata_registry::get_callback_tracing_info() const
{
return m_callback_tracing_info;
}
#endif
} // namespace trace_cache
} // namespace rocprofsys
+218
Просмотреть файл
@@ -0,0 +1,218 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "common/synchronized.hpp"
#include "core/agent.hpp"
#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <optional>
#if ROCPROFSYS_USE_ROCM > 0
# include <rocprofiler-sdk/callback_tracing.h>
# include <rocprofiler-sdk/cxx/name_info.hpp>
#endif
#include <set>
#include <stdint.h>
#include <string.h>
#include <string>
#include <sys/types.h>
#include <unordered_set>
namespace rocprofsys
{
namespace trace_cache
{
namespace info
{
struct process
{
pid_t pid; // < Unique
pid_t ppid;
std::string command;
};
struct pmc
{
agent_type type;
size_t agent_type_index;
std::string target_arch;
size_t event_code;
size_t instance_id;
std::string name; // < Unique
std::string symbol;
std::string description;
std::string long_description;
std::string component;
std::string units;
std::string value_type;
std::string block;
std::string expression;
uint32_t is_constant;
uint32_t is_derived;
std::string extdata;
};
struct pmc_info_hash
{
std::size_t operator()(const pmc& _pmc) const noexcept
{
std::size_t h1 = std::hash<size_t>{}(static_cast<size_t>(_pmc.type));
std::size_t h2 = std::hash<size_t>{}(_pmc.agent_type_index);
std::size_t h3 = std::hash<std::string>{}(_pmc.name);
return h1 ^ (h2 << 1) ^ (h3 << 1);
}
};
struct pmc_info_equal
{
bool operator()(const pmc& lhs, const pmc& rhs) const noexcept
{
return lhs.type == rhs.type && lhs.agent_type_index == rhs.agent_type_index &&
lhs.name == rhs.name;
}
};
struct thread
{
int32_t parent_process_id;
int32_t process_id;
uint64_t thread_id; // < Unique
uint32_t start;
uint32_t end;
std::string extdata;
friend bool operator<(const thread& lhs, const thread& rhs)
{
return lhs.thread_id < rhs.thread_id;
}
};
struct track
{
std::string track_name; // < Unique
std::optional<size_t> thread_id;
std::string extdata;
friend bool operator<(const track& lhs, const track& rhs)
{
return lhs.track_name.compare(rhs.track_name) < 0;
}
};
#if ROCPROFSYS_USE_ROCM > 0
struct code_object_less
{
bool operator()(const rocprofiler_callback_tracing_code_object_load_data_t& lhs,
const rocprofiler_callback_tracing_code_object_load_data_t& rhs) const
{
return lhs.code_object_id < rhs.code_object_id;
}
};
struct kernel_symbol_less
{
bool operator()(
const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t& lhs,
const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t& rhs)
const
{
return lhs.kernel_object < rhs.kernel_object;
}
};
#endif
} // namespace info
class cache_manager;
struct metadata_registry
{
void set_process(const info::process& process);
void add_pmc_info(const info::pmc& pmc_info);
void add_thread_info(const info::thread& thread_info);
void add_track(const info::track& track_info);
void add_queue(const uint64_t& queue_handle);
void add_stream(const uint64_t& stream_handle);
void add_string(const std::string_view& string_value);
info::process get_process_info() const;
std::optional<info::pmc> get_pmc_info(const std::string_view& unique_name) const;
std::optional<info::thread> get_thread_info(const uint32_t& thread_id) const;
std::optional<info::track> get_track_info(const std::string_view& track_name) const;
std::vector<info::pmc> get_pmc_info_list() const;
std::vector<info::thread> get_thread_info_list() const;
std::vector<info::track> get_track_info_list() const;
std::vector<uint64_t> get_queue_list() const;
std::vector<uint64_t> get_stream_list() const;
std::vector<std::string_view> get_string_list() const;
#if ROCPROFSYS_USE_ROCM > 0
void add_code_object(
const rocprofiler_callback_tracing_code_object_load_data_t& code_object);
void add_kernel_symbol(
const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t&
kernel_symbol);
std::vector<rocprofiler_callback_tracing_code_object_load_data_t>
get_code_object_list() const;
std::vector<rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t>
get_kernel_symbol_list() const;
std::optional<rocprofiler_callback_tracing_code_object_load_data_t> get_code_object(
uint64_t code_object_id) const;
std::optional<rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t>
get_kernel_symbol(uint64_t kernel_id) const;
rocprofiler::sdk::buffer_name_info_t<const char*> get_buffer_name_info() const;
rocprofiler::sdk::callback_name_info_t<const char*> get_callback_tracing_info() const;
#endif
private:
friend class cache_manager;
metadata_registry() = default;
common::synchronized<info::process> m_process;
common::synchronized<
std::unordered_set<info::pmc, info::pmc_info_hash, info::pmc_info_equal>>
m_pmc_infos;
common::synchronized<std::set<info::thread>> m_threads;
common::synchronized<std::set<info::track>> m_tracks;
common::synchronized<std::set<uint64_t>> m_streams;
common::synchronized<std::set<uint64_t>> m_queues;
common::synchronized<std::unordered_set<std::string_view>> m_strings;
#if ROCPROFSYS_USE_ROCM > 0
common::synchronized<std::set<rocprofiler_callback_tracing_code_object_load_data_t,
info::code_object_less>>
m_code_objects;
common::synchronized<
std::set<rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t,
info::kernel_symbol_less>>
m_kernel_symbols;
rocprofiler::sdk::buffer_name_info_t<const char*> m_buffered_tracing_info{
rocprofiler::sdk::get_buffer_tracing_names<const char*>()
};
rocprofiler::sdk::callback_name_info_t<const char*> m_callback_tracing_info{
rocprofiler::sdk::get_callback_tracing_names<const char*>()
};
#endif
};
} // namespace trace_cache
} // namespace rocprofsys
+574
Просмотреть файл
@@ -0,0 +1,574 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "trace_cache/rocpd_post_processing.hpp"
#include "agent_manager.hpp"
#include "common.hpp"
#include "config.hpp"
#include "debug.hpp"
#include "library/thread_info.hpp"
#include "node_info.hpp"
#include "rocpd/data_processor.hpp"
#include "trace_cache/metadata_registry.hpp"
#include "trace_cache/sample_type.hpp"
#include "trace_cache/storage_parser.hpp"
#include <cstdint>
#include <limits>
#include <stdexcept>
#include <string>
#include <timemory/utility/demangle.hpp>
#if ROCPROFSYS_USE_ROCM > 0
# include "library/rocprofiler-sdk/fwd.hpp"
# include <rocprofiler-sdk/context.h>
# include <rocprofiler-sdk/version.h>
#endif
namespace rocprofsys
{
namespace trace_cache
{
namespace
{
rocpd::data_processor&
get_data_processor()
{
return rocpd::data_processor::get_instance();
}
#if ROCPROFSYS_USE_ROCM > 0
auto
get_handle_from_code_object(
const rocprofiler_callback_tracing_code_object_load_data_t& code_object)
{
# if(ROCPROFILER_VERSION >= 600)
return code_object.agent_id.handle;
# else
return code_object.rocp_agent.handle;
# endif
}
#endif
} // namespace
postprocessing_callback
rocpd_post_processing::get_kernel_dispatch_callback() const
{
return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) {
#if ROCPROFSYS_USE_ROCM > 0
auto _kds = static_cast<const struct kernel_dispatch_sample&>(parsed);
auto& data_processor = get_data_processor();
auto& agent_manager = agent_manager::get_instance();
auto& n_info = node_info::get_instance();
auto process = m_metadata.get_process_info();
auto agent_primary_key =
agent_manager.get_agent_by_handle(_kds.agent_id_handle).base_id;
auto thread_primary_key =
data_processor.map_thread_id_to_primary_key(_kds.thread_id);
auto category_id = data_processor.insert_string(
trait::name<category::rocm_kernel_dispatch>::value);
auto kernel_symbol = m_metadata.get_kernel_symbol(_kds.kernel_id);
if(!kernel_symbol.has_value())
{
throw std::runtime_error("Kernel symbol is missing for kernel dispatch");
return;
}
auto region_name_primary_key = data_processor.insert_string(
tim::demangle(kernel_symbol->kernel_name).c_str());
auto stack_id = _kds.correlation_id_internal;
auto parent_stack_id = _kds.correlation_id_ancestor;
auto correlation_id = 0;
auto event_id = data_processor.insert_event(category_id, stack_id,
parent_stack_id, correlation_id);
data_processor.insert_kernel_dispatch(
n_info.id, process.pid, thread_primary_key, agent_primary_key, _kds.kernel_id,
_kds.dispatch_id, _kds.queue_id_handle, _kds.stream_handle,
_kds.start_timestamp, _kds.end_timestamp, _kds.private_segment_size,
_kds.group_segment_size, _kds.workgroup_size_x, _kds.workgroup_size_y,
_kds.workgroup_size_z, _kds.grid_size_x, _kds.grid_size_y, _kds.grid_size_z,
region_name_primary_key, event_id);
#endif
};
}
postprocessing_callback
rocpd_post_processing::get_memory_copy_callback() const
{
return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) {
#if ROCPROFSYS_USE_ROCM > 0
auto _mcs = static_cast<const struct memory_copy_sample&>(parsed);
auto& data_processor = get_data_processor();
auto& agent_manager = agent_manager::get_instance();
auto& n_info = node_info::get_instance();
auto process = m_metadata.get_process_info();
auto _name = std::string{ m_metadata.get_buffer_name_info().at(
static_cast<rocprofiler_buffer_tracing_kind_t>(_mcs.kind),
static_cast<rocprofiler_tracing_operation_t>(_mcs.operation)) };
auto name_primary_key = data_processor.insert_string(_name.c_str());
auto category_primary_key =
data_processor.insert_string(trait::name<category::rocm_memory_copy>::value);
auto thread_primary_key =
data_processor.map_thread_id_to_primary_key(_mcs.thread_id);
auto dst_agent_primary_key =
agent_manager.get_agent_by_handle(_mcs.dst_agent_id_handle).base_id;
auto src_agent_primary_key =
agent_manager.get_agent_by_handle(_mcs.src_agent_id_handle).base_id;
auto stack_id = _mcs.correlation_id_internal;
auto parent_stack_id = _mcs.correlation_id_ancestor;
auto correlation_id = 0;
auto queue_id = 0;
auto event_primary_key = data_processor.insert_event(
category_primary_key, stack_id, parent_stack_id, correlation_id);
data_processor.insert_memory_copy(
n_info.id, process.pid, thread_primary_key, _mcs.start_timestamp,
_mcs.end_timestamp, name_primary_key, dst_agent_primary_key,
_mcs.dst_address_value, src_agent_primary_key, _mcs.src_address_value,
_mcs.bytes, queue_id, _mcs.stream_handle, name_primary_key,
event_primary_key);
#endif
};
}
#if(ROCPROFSYS_USE_ROCM > 0 && ROCPROFILER_VERSION >= 600)
postprocessing_callback
rocpd_post_processing::get_memory_allocate_callback() const
{
# if ROCPROFSYS_USE_ROCM > 0
auto memtype_to_db =
[](std::string_view memory_type) -> std::pair<std::string, std::string> {
constexpr auto MEMORY_PREFIX = std::string_view{ "MEMORY_ALLOCATION_" };
constexpr auto SCRATCH_PREFIX = std::string_view{ "SCRATCH_MEMORY_" };
constexpr auto VMEM_PREFIX = std::string_view{ "VMEM_" };
constexpr auto ASYNC_PREFIX = std::string_view{ "ASYNC_" };
std::string _type;
std::string _level;
if(memory_type.find(MEMORY_PREFIX) == 0)
{
_type = memory_type.substr(MEMORY_PREFIX.length());
if(_type.find(VMEM_PREFIX) == 0)
{
_type = _type.substr(VMEM_PREFIX.length());
_level = "VIRTUAL";
}
else
{
_level = "REAL";
}
}
else if(memory_type.find(SCRATCH_PREFIX) == 0)
{
_type = memory_type.substr(SCRATCH_PREFIX.length());
_level = "SCRATCH";
if(memory_type.find(ASYNC_PREFIX) == 0)
{
_type = memory_type.substr(ASYNC_PREFIX.length()); // RECLAIM
}
}
if(_type == "ALLOCATE")
{
_type = "ALLOC";
}
return std::make_pair(_type, _level);
};
# endif
return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) {
# if ROCPROFSYS_USE_ROCM > 0
auto _mas = static_cast<const struct memory_allocate_sample&>(parsed);
auto& data_processor = get_data_processor();
auto& agent_manager = agent_manager::get_instance();
auto& n_info = node_info::get_instance();
auto process = m_metadata.get_process_info();
auto thread_primary_key =
data_processor.map_thread_id_to_primary_key(_mas.thread_id);
auto agent_primary_key = std::optional<uint64_t>{};
const auto invalid_context = ROCPROFILER_CONTEXT_NONE;
if(_mas.agent_id_handle != invalid_context.handle)
{
{
agent_primary_key =
agent_manager.get_agent_by_handle(_mas.agent_id_handle).base_id;
}
const auto* _name = m_metadata.get_buffer_name_info().at(
static_cast<rocprofiler_buffer_tracing_kind_t>(_mas.kind),
static_cast<rocprofiler_tracing_operation_t>(_mas.operation));
auto [type, level] = memtype_to_db(_name);
auto stack_id = _mas.correlation_id_internal;
auto parent_stack_id = _mas.correlation_id_ancestor;
auto correlation_id = 0;
auto queue_id = 0;
auto category_primary_key = data_processor.insert_string(
trait::name<category::rocm_memory_allocate>::value);
auto event_primary_key = data_processor.insert_event(
category_primary_key, stack_id, parent_stack_id, correlation_id);
data_processor.insert_memory_alloc(
n_info.id, process.pid, thread_primary_key, agent_primary_key,
type.c_str(), level.c_str(), _mas.start_timestamp, _mas.end_timestamp,
_mas.address_value, _mas.allocation_size, queue_id, _mas.stream_handle,
event_primary_key);
# endif
};
};
}
#endif
postprocessing_callback
rocpd_post_processing::get_region_callback() const
{
[[maybe_unused]] auto parse_args = []([[maybe_unused]] const std::string& arg_str) {
#if ROCPROFSYS_USE_ROCM > 0
rocprofiler_sdk::function_args_t args;
const std::string delimiter = ";;";
auto split = [](const std::string& str, const std::string& _delimiter) {
std::vector<std::string> tokens;
size_t start = 0;
size_t end = str.find(_delimiter);
while(end != std::string::npos)
{
tokens.push_back(str.substr(start, end - start));
start = end + _delimiter.length();
end = str.find(_delimiter, start);
}
return tokens;
};
auto tokens = split(arg_str, delimiter);
// Ensure the number of tokens is a multiple of 4
if(tokens.size() % 4 != 0)
{
throw std::invalid_argument("Malformed argument string.");
}
for(auto it = tokens.begin(); it != tokens.end(); it += 4)
{
rocprofiler_sdk::argument_info arg = { static_cast<uint32_t>(std::stoi(*it)),
*(it + 1), *(it + 2), *(it + 3) };
args.push_back(arg);
}
return args;
#endif
};
return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) {
#if ROCPROFSYS_USE_ROCM > 0
auto _rs = static_cast<const struct region_sample&>(parsed);
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
auto process = m_metadata.get_process_info();
auto thread_primary_key =
data_processor.map_thread_id_to_primary_key(_rs.thread_id);
auto callback_tracing_info = m_metadata.get_callback_tracing_info();
auto _name = std::string{ callback_tracing_info.at(
static_cast<rocprofiler_callback_tracing_kind_t>(_rs.kind),
static_cast<rocprofiler_tracing_operation_t>(_rs.operation)) };
auto name_primary_key = data_processor.insert_string(_name.c_str());
auto category_primary_key = data_processor.insert_string(_rs.category.c_str());
size_t stack_id = _rs.correlation_id_internal;
size_t parent_stack_id = _rs.correlation_id_ancestor;
size_t correlation_id = 0;
auto event_primary_key =
data_processor.insert_event(category_primary_key, stack_id, parent_stack_id,
correlation_id, _rs.call_stack.c_str());
auto args = parse_args(_rs.args_str);
for(const auto& arg : args)
{
data_processor.insert_args(event_primary_key, arg.arg_number,
arg.arg_type.c_str(), arg.arg_name.c_str(),
arg.arg_value.c_str());
}
data_processor.insert_region(n_info.id, process.pid, thread_primary_key,
_rs.start_timestamp, _rs.end_timestamp,
name_primary_key, event_primary_key);
#endif
};
}
postprocessing_callback
rocpd_post_processing::get_in_time_sample_callback() const
{
return [&](const storage_parsed_type_base& parsed) {
auto _its = static_cast<const struct in_time_sample&>(parsed);
auto& data_processor = get_data_processor();
auto track_primary_key = data_processor.insert_string(_its.track_name.c_str());
auto event_id = data_processor.insert_event(
track_primary_key, _its.stack_id, _its.parent_stack_id, _its.correlation_id,
_its.call_stack.c_str(), _its.line_info.c_str(), _its.event_metadata.c_str());
data_processor.insert_sample(_its.track_name.c_str(), _its.timestamp_ns, event_id,
"{}");
};
}
postprocessing_callback
rocpd_post_processing::get_pmc_event_with_sample_callback() const
{
return [&](const storage_parsed_type_base& parsed) {
auto _pmc = static_cast<const struct pmc_event_with_sample&>(parsed);
auto& data_processor = get_data_processor();
auto track_primary_key = data_processor.insert_string(_pmc.track_name.c_str());
auto& agent_manager = agent_manager::get_instance();
auto agent_primary_key =
agent_manager.get_agent_by_handle(_pmc.agent_handle).base_id;
auto event_id = data_processor.insert_event(
track_primary_key, _pmc.stack_id, _pmc.parent_stack_id, _pmc.correlation_id,
_pmc.call_stack.c_str(), _pmc.line_info.c_str(), _pmc.event_metadata.c_str());
data_processor.insert_sample(_pmc.track_name.c_str(), _pmc.timestamp_ns, event_id,
"{}");
data_processor.insert_pmc_event(event_id, agent_primary_key,
_pmc.pmc_info_name.c_str(), _pmc.value);
};
}
rocpd_post_processing::rocpd_post_processing(metadata_registry& md)
: m_metadata(md)
{}
void
rocpd_post_processing::register_parser_callback([[maybe_unused]] storage_parser& parser)
{
#if ROCPROFSYS_USE_ROCM > 0
if(!get_use_rocpd())
{
return;
}
parser.register_type_callback(entry_type::region, get_region_callback());
parser.register_type_callback(entry_type::kernel_dispatch,
get_kernel_dispatch_callback());
parser.register_type_callback(entry_type::memory_copy, get_memory_copy_callback());
# if(ROCPROFILER_VERSION >= 600)
parser.register_type_callback(entry_type::memory_alloc,
get_memory_allocate_callback());
# endif
parser.register_type_callback(entry_type::in_time_sample,
get_in_time_sample_callback());
parser.register_type_callback(entry_type::pmc_event_with_sample,
get_pmc_event_with_sample_callback());
ROCPROFSYS_DEBUG("Buffer parser callbacks are registered..");
#endif
}
void
rocpd_post_processing::post_process_metadata()
{
#if ROCPROFSYS_USE_ROCM > 0
if(!get_use_rocpd())
{
return;
}
ROCPROFSYS_DEBUG("Post processing metadata..");
auto& data_processor = get_data_processor();
auto& agent_mngr = agent_manager::get_instance();
auto n_info = node_info::get_instance();
data_processor.insert_node_info(n_info.id, n_info.hash, n_info.machine_id.c_str(),
n_info.system_name.c_str(), n_info.node_name.c_str(),
n_info.release.c_str(), n_info.version.c_str(),
n_info.machine.c_str(), n_info.domain_name.c_str());
auto process_info = m_metadata.get_process_info();
data_processor.insert_process_info(n_info.id, process_info.ppid, process_info.pid, 0,
0, 0, 0, process_info.command.c_str(), "{}");
const auto& agents = agent_mngr.get_agents();
int counter = 0;
for(const auto& rocpd_agent : agents)
{
auto _base_id = rocpd::data_processor::get_instance().insert_agent(
n_info.id, process_info.pid,
((rocpd_agent->type == agent_type::GPU) ? "GPU" : "CPU"), counter++,
rocpd_agent->logical_node_id, rocpd_agent->logical_node_type_id,
rocpd_agent->device_id, rocpd_agent->name.c_str(),
rocpd_agent->model_name.c_str(), rocpd_agent->vendor_name.c_str(),
rocpd_agent->product_name.c_str(), "");
rocpd_agent->base_id = _base_id;
}
auto _string_list = m_metadata.get_string_list();
for(auto& _string : _string_list)
{
data_processor.insert_string(std::string(_string).c_str());
}
auto _thread_info_list = m_metadata.get_thread_info_list();
for(auto& t_info : _thread_info_list)
{
rocpd_insert_thread_id(t_info, n_info, process_info);
}
auto _track_info_list = m_metadata.get_track_info_list();
for(auto& track : _track_info_list)
{
auto thread_id =
track.thread_id.has_value()
? std::make_optional<size_t>(data_processor.map_thread_id_to_primary_key(
track.thread_id.value()))
: std::nullopt;
data_processor.insert_track(track.track_name.c_str(), n_info.id, process_info.pid,
thread_id);
}
auto _code_object_list = m_metadata.get_code_object_list();
for(const auto& code_object : _code_object_list)
{
auto dev_id =
agent_mngr.get_agent_by_handle(get_handle_from_code_object(code_object))
.base_id;
const char* strg_type = "UNKNOWN";
switch(code_object.storage_type)
{
case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE: strg_type = "FILE"; break;
case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY: strg_type = "MEMORY"; break;
default: break;
}
data_processor.insert_code_object(code_object.code_object_id, n_info.id,
process_info.pid, dev_id, code_object.uri,
code_object.load_base, code_object.load_size,
code_object.load_delta, strg_type);
}
auto _kernel_symbols_list = m_metadata.get_kernel_symbol_list();
for(const auto& kernel_symbol : _kernel_symbols_list)
{
auto kernel_name = tim::demangle(kernel_symbol.kernel_name);
data_processor.insert_kernel_symbol(
kernel_symbol.kernel_id, n_info.id, process_info.pid,
kernel_symbol.code_object_id, kernel_symbol.kernel_name, kernel_name.c_str(),
kernel_symbol.kernel_object, kernel_symbol.kernarg_segment_size,
kernel_symbol.kernarg_segment_alignment, kernel_symbol.group_segment_size,
kernel_symbol.private_segment_size, kernel_symbol.sgpr_count,
kernel_symbol.arch_vgpr_count, kernel_symbol.accum_vgpr_count);
data_processor.insert_string(kernel_name.c_str());
}
auto _queue_list = m_metadata.get_queue_list();
for(const auto& queue_handle : _queue_list)
{
std::stringstream ss;
ss << "Queue " << queue_handle;
data_processor.insert_queue_info(queue_handle, n_info.id, process_info.pid,
ss.str().c_str());
}
auto _stream_list = m_metadata.get_stream_list();
for(const auto& stream_handle : _stream_list)
{
std::stringstream ss;
ss << "Stream " << stream_handle;
data_processor.insert_stream_info(stream_handle, n_info.id, process_info.pid,
ss.str().c_str());
}
auto buffer_info_list = m_metadata.get_buffer_name_info();
for(const auto& buffer_info : buffer_info_list)
{
for(const auto& item : buffer_info.items())
{
data_processor.insert_string(*item.second);
}
}
auto callback_info_list = m_metadata.get_callback_tracing_info();
for(const auto& cb_info : callback_info_list)
{
for(const auto& item : cb_info.items())
{
data_processor.insert_string(*item.second);
}
}
auto pmc_info_list = m_metadata.get_pmc_info_list();
for(const auto& pmc_info : pmc_info_list)
{
const auto agent_primary_key =
agent_mngr.get_agent_by_type_index(pmc_info.agent_type_index, pmc_info.type)
.base_id;
data_processor.insert_pmc_description(
n_info.id, process_info.pid, agent_primary_key, pmc_info.target_arch.c_str(),
pmc_info.event_code, pmc_info.instance_id, pmc_info.name.c_str(),
pmc_info.symbol.c_str(), pmc_info.description.c_str(),
pmc_info.long_description.c_str(), pmc_info.component.c_str(),
pmc_info.units.c_str(), pmc_info.value_type.c_str(), pmc_info.block.c_str(),
pmc_info.expression.c_str(), pmc_info.is_constant, pmc_info.is_derived);
}
#endif
}
inline void
rocpd_post_processing::rocpd_insert_thread_id(info::thread& t_info,
const node_info& n_info,
const info::process& process_info) const
{
const auto& extended_info = thread_info::get(t_info.thread_id, SequentTID);
if(extended_info.has_value())
{
t_info.start = extended_info->get_start();
t_info.end = extended_info->get_stop();
}
std::stringstream ss;
ss << "Thread " << t_info.thread_id;
get_data_processor().insert_thread_info(n_info.id, process_info.ppid,
process_info.pid, t_info.thread_id,
ss.str().c_str(), t_info.start, t_info.end);
}
} // namespace trace_cache
} // namespace rocprofsys
+60
Просмотреть файл
@@ -0,0 +1,60 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "core/node_info.hpp"
#include "core/trace_cache/metadata_registry.hpp"
#include "core/trace_cache/storage_parser.hpp"
namespace rocprofsys
{
namespace trace_cache
{
class rocpd_post_processing
{
public:
rocpd_post_processing(metadata_registry& metadata);
void register_parser_callback(storage_parser& parser);
void post_process_metadata();
private:
using primary_key = size_t;
inline void rocpd_insert_thread_id(info::thread& t_info, const node_info& n_info,
const info::process& process_info) const;
postprocessing_callback get_kernel_dispatch_callback() const;
postprocessing_callback get_memory_copy_callback() const;
#if(ROCPROFILER_VERSION >= 600)
postprocessing_callback get_memory_allocate_callback() const;
#endif
postprocessing_callback get_region_callback() const;
postprocessing_callback get_in_time_sample_callback() const;
postprocessing_callback get_pmc_event_with_sample_callback() const;
metadata_registry& m_metadata;
};
} // namespace trace_cache
} // namespace rocprofsys
+198
Просмотреть файл
@@ -0,0 +1,198 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <stdint.h>
#include <string>
#include <unistd.h>
#include <utility>
#if ROCPROFSYS_USE_ROCM > 0
# include <rocprofiler-sdk/version.h>
#endif
namespace rocprofsys
{
namespace trace_cache
{
struct storage_parsed_type_base
{};
struct kernel_dispatch_sample : storage_parsed_type_base
{
// Timing fields
uint64_t start_timestamp;
uint64_t end_timestamp;
// Identification fields
uint64_t thread_id;
uint64_t agent_id_handle;
uint64_t kernel_id;
uint64_t dispatch_id;
uint64_t queue_id_handle;
// Correlation fields
uint64_t correlation_id_internal;
uint64_t correlation_id_ancestor;
// Dispatch configuration
uint32_t private_segment_size;
uint32_t group_segment_size;
uint32_t workgroup_size_x;
uint32_t workgroup_size_y;
uint32_t workgroup_size_z;
uint32_t grid_size_x;
uint32_t grid_size_y;
uint32_t grid_size_z;
// Stream handle
size_t stream_handle;
};
struct memory_copy_sample : storage_parsed_type_base
{
// Timing fields
uint64_t start_timestamp;
uint64_t end_timestamp;
// Identification fields
uint64_t thread_id;
uint64_t dst_agent_id_handle;
uint64_t src_agent_id_handle;
// Operation details
int32_t kind;
int32_t operation;
uint64_t bytes;
// Correlation fields
uint64_t correlation_id_internal;
uint64_t correlation_id_ancestor;
// Address fields (version dependent)
uint64_t dst_address_value;
uint64_t src_address_value;
// Stream handle
size_t stream_handle;
};
#if(ROCPROFILER_VERSION >= 600)
struct memory_allocate_sample : storage_parsed_type_base
{
// Timing fields
uint64_t start_timestamp;
uint64_t end_timestamp;
// Identification fields
uint64_t thread_id;
uint64_t agent_id_handle;
// Operation details
int32_t kind;
int32_t operation;
uint64_t allocation_size;
// Correlation fields
uint64_t correlation_id_internal;
uint64_t correlation_id_ancestor;
// Address fields (version dependent)
uint64_t address_value;
// Stream handle
size_t stream_handle;
};
#endif
struct region_sample : storage_parsed_type_base
{
region_sample() = default;
region_sample(uint64_t _thread_id, int32_t _kind, int32_t _operation,
uint64_t _correlation_id_internal, uint64_t _correlation_id_ancestor,
uint64_t _start_timestamp, uint64_t _end_timestamp,
std::string _call_stack, std::string _args_str, std::string _category)
: thread_id(_thread_id)
, kind(_kind)
, operation(_operation)
, correlation_id_internal(_correlation_id_internal)
, correlation_id_ancestor(_correlation_id_ancestor)
, start_timestamp(_start_timestamp)
, end_timestamp(_end_timestamp)
, call_stack(std::move(_call_stack))
, args_str(std::move(_args_str))
, category(std::move(_category))
{}
// Identification fields
uint64_t thread_id;
int32_t kind;
int32_t operation;
// Correlation fields
uint64_t correlation_id_internal;
uint64_t correlation_id_ancestor;
// Timing fields
uint64_t start_timestamp;
uint64_t end_timestamp;
// Additional fields
std::string call_stack;
std::string args_str;
std::string category;
};
struct in_time_sample : storage_parsed_type_base
{
std::string track_name;
size_t timestamp_ns;
std::string event_metadata;
size_t stack_id;
size_t parent_stack_id;
size_t correlation_id;
std::string call_stack;
std::string line_info;
};
struct pmc_event_with_sample : in_time_sample
{
size_t agent_handle;
std::string pmc_info_name;
size_t value;
};
enum class entry_type : uint32_t
{
in_time_sample = 0x0000,
pmc_event_with_sample = 0x0001,
region = 0x0002,
kernel_dispatch = 0x0003,
memory_copy = 0x0004,
#if(ROCPROFSYS_USE_ROCM && ROCPROFILER_VERSION >= 600)
memory_alloc = 0x0005,
#endif
fragmented_space = 0xFFFF
};
} // namespace trace_cache
} // namespace rocprofsys
+230
Просмотреть файл
@@ -0,0 +1,230 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "storage_parser.hpp"
#include "debug.hpp"
#include "trace_cache/sample_type.hpp"
#include <cstdio>
#include <fstream>
#include <sstream>
#include <string>
namespace rocprofsys
{
namespace trace_cache
{
storage_parser::storage_parser(pid_t _pid)
: m_pid(_pid)
{}
void
storage_parser::register_type_callback(
const entry_type& type,
const std::function<void(const storage_parsed_type_base&)>& callback)
{
m_callbacks[type].push_back(callback);
}
void
storage_parser::consume_storage()
{
ROCPROFSYS_DEBUG("Consuming buffered storage with filename: %s", filename.c_str());
if(m_pid != getpid())
{
ROCPROFSYS_DEBUG(
"Storage parser is not created in same process as shutting down..");
return;
}
std::ifstream ifs(filename, std::ios::binary);
if(!ifs)
{
std::stringstream ss;
ss << "Error opening file for reading: " << filename << "\n";
throw std::runtime_error(ss.str());
}
bool _parsing_needed = !m_callbacks.empty();
struct __attribute__((packed)) sample_header
{
entry_type type;
size_t sample_size;
};
sample_header header;
while(!ifs.eof() && _parsing_needed)
{
ifs.read(reinterpret_cast<char*>(&header), sizeof(header));
if(header.sample_size == 0 || ifs.eof())
{
continue;
}
std::vector<uint8_t> sample;
sample.reserve(header.sample_size);
ifs.read(reinterpret_cast<char*>(sample.data()), header.sample_size);
if(ifs.bad())
{
ROCPROFSYS_WARNING(
1,
"Bad read while consuming buffered storage. Filename: %s. Bytes read: %d",
filename.c_str(), static_cast<int>(ifs.tellg()));
continue;
}
switch(header.type)
{
case entry_type::kernel_dispatch:
{
kernel_dispatch_sample _kernel_dispatch_sample;
parse_data(sample.data(), _kernel_dispatch_sample.start_timestamp,
_kernel_dispatch_sample.end_timestamp,
_kernel_dispatch_sample.thread_id,
_kernel_dispatch_sample.agent_id_handle,
_kernel_dispatch_sample.kernel_id,
_kernel_dispatch_sample.dispatch_id,
_kernel_dispatch_sample.queue_id_handle,
_kernel_dispatch_sample.correlation_id_internal,
_kernel_dispatch_sample.correlation_id_ancestor,
_kernel_dispatch_sample.private_segment_size,
_kernel_dispatch_sample.group_segment_size,
_kernel_dispatch_sample.workgroup_size_x,
_kernel_dispatch_sample.workgroup_size_y,
_kernel_dispatch_sample.workgroup_size_z,
_kernel_dispatch_sample.grid_size_x,
_kernel_dispatch_sample.grid_size_y,
_kernel_dispatch_sample.grid_size_z,
_kernel_dispatch_sample.stream_handle);
invoke_callbacks(header.type, _kernel_dispatch_sample);
break;
}
case entry_type::memory_copy:
{
memory_copy_sample _memory_copy_sample;
parse_data(
sample.data(), _memory_copy_sample.start_timestamp,
_memory_copy_sample.end_timestamp, _memory_copy_sample.thread_id,
_memory_copy_sample.dst_agent_id_handle,
_memory_copy_sample.src_agent_id_handle, _memory_copy_sample.kind,
_memory_copy_sample.operation, _memory_copy_sample.bytes,
_memory_copy_sample.correlation_id_internal,
_memory_copy_sample.correlation_id_ancestor,
_memory_copy_sample.dst_address_value,
_memory_copy_sample.src_address_value,
_memory_copy_sample.stream_handle);
invoke_callbacks(header.type, _memory_copy_sample);
break;
}
#if(ROCPROFILER_VERSION >= 600)
case entry_type::memory_alloc:
{
memory_allocate_sample _memory_allocate_sample;
parse_data(sample.data(), _memory_allocate_sample.start_timestamp,
_memory_allocate_sample.end_timestamp,
_memory_allocate_sample.thread_id,
_memory_allocate_sample.agent_id_handle,
_memory_allocate_sample.kind,
_memory_allocate_sample.operation,
_memory_allocate_sample.allocation_size,
_memory_allocate_sample.correlation_id_internal,
_memory_allocate_sample.correlation_id_ancestor,
_memory_allocate_sample.address_value,
_memory_allocate_sample.stream_handle);
invoke_callbacks(header.type, _memory_allocate_sample);
break;
}
#endif
case entry_type::region:
{
region_sample _region_sample;
parse_data(sample.data(), _region_sample.thread_id, _region_sample.kind,
_region_sample.operation,
_region_sample.correlation_id_internal,
_region_sample.correlation_id_ancestor,
_region_sample.start_timestamp, _region_sample.end_timestamp,
_region_sample.call_stack, _region_sample.args_str,
_region_sample.category);
invoke_callbacks(header.type, _region_sample);
break;
}
case entry_type::in_time_sample:
{
in_time_sample _in_time_sample;
parse_data(sample.data(), _in_time_sample.track_name,
_in_time_sample.timestamp_ns, _in_time_sample.event_metadata,
_in_time_sample.stack_id, _in_time_sample.parent_stack_id,
_in_time_sample.correlation_id, _in_time_sample.call_stack,
_in_time_sample.line_info);
invoke_callbacks(header.type, _in_time_sample);
break;
}
case entry_type::pmc_event_with_sample:
{
pmc_event_with_sample _pmc_event_with_sample;
parse_data(
sample.data(), _pmc_event_with_sample.track_name,
_pmc_event_with_sample.timestamp_ns,
_pmc_event_with_sample.event_metadata,
_pmc_event_with_sample.stack_id,
_pmc_event_with_sample.parent_stack_id,
_pmc_event_with_sample.correlation_id,
_pmc_event_with_sample.call_stack, _pmc_event_with_sample.line_info,
_pmc_event_with_sample.agent_handle,
_pmc_event_with_sample.pmc_info_name, _pmc_event_with_sample.value);
invoke_callbacks(header.type, _pmc_event_with_sample);
break;
}
default: break;
}
}
ifs.close();
ROCPROFSYS_DEBUG("File parsing finished. Removing %s from file system",
filename.c_str());
std::remove(filename.c_str());
}
void
storage_parser::invoke_callbacks(entry_type type, const storage_parsed_type_base& parsed)
{
auto _callback_list = m_callbacks.find(type);
if(_callback_list == m_callbacks.end())
{
ROCPROFSYS_VERBOSE(1, "Callback not found for cache postprocessing");
return;
}
for(auto& cb : _callback_list->second)
{
cb(parsed);
}
}
} // namespace trace_cache
} // namespace rocprofsys
+83
Просмотреть файл
@@ -0,0 +1,83 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "buffer_storage.hpp"
#include "sample_type.hpp"
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <functional>
#include <map>
#include <rocprofiler-systems/categories.h>
#include <stdint.h>
#include <string>
#include <type_traits>
#include <vector>
namespace rocprofsys
{
namespace trace_cache
{
using postprocessing_callback = std::function<void(const storage_parsed_type_base&)>;
class cache_manager;
class storage_parser
{
public:
void register_type_callback(const entry_type& type,
const postprocessing_callback& callback);
void consume_storage();
private:
friend class cache_manager;
storage_parser(pid_t _pid);
template <typename T>
static void process_arg(const uint8_t*& data_pos, T& arg)
{
if constexpr(std::is_same_v<T, std::string>)
{
arg = std::string((const char*) data_pos);
data_pos += arg.size() + 1;
}
else
{
arg = *reinterpret_cast<const T*>(data_pos);
data_pos += sizeof(T);
}
}
template <typename... Args>
static void parse_data(const uint8_t* data_pos, Args&... args)
{
(process_arg(data_pos, args), ...);
}
private:
pid_t m_pid;
void invoke_callbacks(entry_type type, const storage_parsed_type_base& parsed);
std::map<entry_type, std::vector<postprocessing_callback>> m_callbacks;
};
} // namespace trace_cache
} // namespace rocprofsys
+3 -13
Просмотреть файл
@@ -147,16 +147,6 @@ reset_rocprofsys_preload()
{
(void) get_rocprofsys_is_preloaded();
(void) get_rocprofsys_preload();
auto _modified_preload = std::string{};
for(const auto& itr : delimit(_preload_libs, ":"))
{
if(itr.find("librocprof-sys") != std::string::npos) continue;
_modified_preload += common::join("", ":", itr);
}
if(!_modified_preload.empty() && _modified_preload.find(':') == 0)
_modified_preload = _modified_preload.substr(1);
setenv("LD_PRELOAD", _modified_preload.c_str(), 1);
}
}
@@ -1251,9 +1241,9 @@ rocprofsys_preload()
verify_instrumented_preloaded();
static bool _once = false;
if(_once) return _preload;
_once = true;
static pid_t _once = 0;
if(_once == getpid()) return _preload;
_once = getpid();
if(_preload)
{
+1
Просмотреть файл
@@ -48,6 +48,7 @@ extern "C"
ROCPROFSYS_CATEGORY_ROCM_HSA_API,
ROCPROFSYS_CATEGORY_ROCM_KERNEL_DISPATCH,
ROCPROFSYS_CATEGORY_ROCM_MEMORY_COPY,
ROCPROFSYS_CATEGORY_ROCM_MEMORY_ALLOCATE,
ROCPROFSYS_CATEGORY_ROCM_SCRATCH_MEMORY,
ROCPROFSYS_CATEGORY_ROCM_HIP_STREAM,
ROCPROFSYS_CATEGORY_ROCM_PAGE_MIGRATION,
+20 -25
Просмотреть файл
@@ -44,6 +44,7 @@
#include "core/perfetto_fwd.hpp"
#include "core/rocpd/data_processor.hpp"
#include "core/timemory.hpp"
#include "core/trace_cache/cache_manager.hpp"
#include "core/utility.hpp"
#include "library/causal/data.hpp"
#include "library/causal/experiment.hpp"
@@ -328,37 +329,17 @@ read_command_line(pid_t _pid)
}
void
rocprofsys_preinit_rocpd()
rocprofsys_preinit_cache()
{
auto& _data_processor = rocpd::data_processor::get_instance();
const auto& _n_info = node_info::get_instance();
auto _cmd_line = read_command_line(getpid());
auto& _agent_manager = agent_manager::get_instance();
auto _cmd_line = read_command_line(getpid());
if(_cmd_line.empty())
{
_cmd_line.emplace_back("rocprofiler-systems");
}
_data_processor.insert_node_info(
_n_info.id, _n_info.hash, _n_info.machine_id.c_str(), _n_info.system_name.c_str(),
_n_info.node_name.c_str(), _n_info.release.c_str(), _n_info.version.c_str(),
_n_info.machine.c_str(), _n_info.domain_name.c_str());
_data_processor.insert_process_info(_n_info.id, getppid(), getpid(), 0, 0, 0, 0,
_cmd_line[0].c_str(), "{}");
const auto& agents = _agent_manager.get_agents();
for(const auto& rocpd_agent : agents)
{
auto _base_id = rocpd::data_processor::get_instance().insert_agent(
_n_info.id, getpid(),
((rocpd_agent->type == agent_type::GPU) ? "GPU" : "CPU"),
rocpd_agent->node_id, rocpd_agent->logical_node_id,
rocpd_agent->logical_node_type_id, rocpd_agent->id, rocpd_agent->name.c_str(),
rocpd_agent->model_name.c_str(), rocpd_agent->vendor_name.c_str(),
rocpd_agent->product_name.c_str(), "");
rocpd_agent->base_id = _base_id;
}
trace_cache::get_metadata_registry().set_process(
{ getpid(), getppid(), _cmd_line.at(0) });
}
void
@@ -534,7 +515,7 @@ rocprofsys_init_tooling_hidden(void)
#if !(ROCPROFSYS_USE_ROCM > 0)
rocprofsys_preinit_cpu_agents();
#endif
if(get_use_rocpd()) rocprofsys_preinit_rocpd();
rocprofsys_preinit_cache();
if(get_use_process_sampling())
{
@@ -776,10 +757,16 @@ rocprofsys_finalize_hidden(void)
rocprofiler_sdk::shutdown();
}
#endif
auto& _manager = rocprofsys::trace_cache::cache_manager::get_instance();
_manager.shutdown();
_manager.post_process();
#if ROCPROFSYS_USE_ROCM > 0
if(get_use_rocpd())
{
rocpd::data_processor::get_instance().flush();
}
#endif
set_state(State::Finalized);
std::quick_exit(EXIT_SUCCESS);
return;
@@ -880,6 +867,12 @@ rocprofsys_finalize_hidden(void)
}
#endif
{
auto& _manager = rocprofsys::trace_cache::cache_manager::get_instance();
_manager.shutdown();
_manager.post_process();
}
ROCPROFSYS_DEBUG_F("Stopping and destroying instrumentation bundles...\n");
for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i)
{
@@ -1070,10 +1063,12 @@ rocprofsys_finalize_hidden(void)
[](int) {});
common::destroy_static_objects();
#if ROCPROFSYS_USE_ROCM > 0
if(get_use_rocpd())
{
rocpd::data_processor::get_instance().flush();
}
#endif
}
//======================================================================================//
+53 -52
Просмотреть файл
@@ -27,6 +27,8 @@
// THE SOFTWARE.
#include "core/agent.hpp"
#include "core/trace_cache/cache_manager.hpp"
#include "core/trace_cache/cache_utility.hpp"
#if defined(NDEBUG)
# undef NDEBUG
#endif
@@ -41,6 +43,7 @@
#include "core/perfetto.hpp"
#include "core/rocpd/data_processor.hpp"
#include "core/state.hpp"
#include "core/trace_cache/metadata_registry.hpp"
#include "library/amd_smi.hpp"
#include "library/runtime.hpp"
#include "library/thread_info.hpp"
@@ -85,34 +88,31 @@ get_data_processor()
}
void
rocpd_initialize_category()
metadata_initialize_category()
{
get_data_processor().insert_category(ROCPROFSYS_CATEGORY_AMD_SMI,
trait::name<category::amd_smi>::value);
trace_cache::get_metadata_registry().add_string(
trait::name<category::amd_smi>::value);
}
void
rocpd_initialize_smi_tracks()
metadata_initialize_smi_tracks()
{
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
const auto thread_id = std::nullopt; // Internal thread ID for amd-smi
const auto thread_id = std::nullopt;
data_processor.insert_track(trait::name<category::amd_smi_mm_busy>::value, n_info.id,
getpid(), thread_id);
data_processor.insert_track(trait::name<category::amd_smi_power>::value, n_info.id,
getpid(), thread_id);
data_processor.insert_track(trait::name<category::amd_smi_temp>::value, n_info.id,
getpid(), thread_id);
data_processor.insert_track(trait::name<category::amd_smi_memory_usage>::value,
n_info.id, getpid(), thread_id);
trace_cache::get_metadata_registry().add_track(
{ trait::name<category::amd_smi_mm_busy>::value, thread_id, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trait::name<category::amd_smi_power>::value, thread_id, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trait::name<category::amd_smi_temp>::value, thread_id, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trait::name<category::amd_smi_memory_usage>::value, thread_id, "{}" });
}
void
rocpd_initialize_smi_pmc(size_t gpu_id)
metadata_initialize_smi_pmc(size_t gpu_id)
{
auto& data_processor = get_data_processor();
// find the proper values for a following definitions
// TODO: Find the proper values for a following definitions
size_t EVENT_CODE = 0;
size_t INSTANCE_ID = 0;
const char* LONG_DESCRIPTION = "";
@@ -121,34 +121,33 @@ rocpd_initialize_smi_pmc(size_t gpu_id)
const char* EXPRESSION = "";
const char* CELSIUS_DEGREES = "\u00B0C";
auto ni = node_info::get_instance();
const auto* TARGET_ARCH = "GPU";
const char* TARGET_ARCH = "GPU";
auto& _agent_manager = agent_manager::get_instance();
auto base_id = _agent_manager.get_agent_by_id(gpu_id, agent_type::GPU).base_id;
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_mm_busy>::value, "Busy",
trait::name<category::amd_smi_mm_busy>::description, LONG_DESCRIPTION,
COMPONENT, trace_cache::PERCENTAGE, rocprofsys::trace_cache::ABSOLUTE, BLOCK,
EXPRESSION, 0, 0, "{}" });
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_mm_busy>::value, "Busy",
trait::name<category::amd_smi_mm_busy>::description, LONG_DESCRIPTION, COMPONENT,
"%", "ABS", BLOCK, EXPRESSION, 0, 0);
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_temp>::value, "Temp",
trait::name<category::amd_smi_temp>::description, LONG_DESCRIPTION, COMPONENT,
CELSIUS_DEGREES, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_temp>::value, "Temp",
trait::name<category::amd_smi_temp>::description, LONG_DESCRIPTION, COMPONENT,
CELSIUS_DEGREES, "ABS", BLOCK, EXPRESSION, 0, 0);
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_power>::value, "Pow",
trait::name<category::amd_smi_power>::description, LONG_DESCRIPTION, COMPONENT,
"W", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_power>::value, "Pow",
trait::name<category::amd_smi_power>::description, LONG_DESCRIPTION, COMPONENT,
"w", "ABS", BLOCK, EXPRESSION, 0, 0);
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_memory_usage>::value, "MemUsg",
trait::name<category::amd_smi_memory_usage>::description, LONG_DESCRIPTION,
COMPONENT, "MB", "ABS", BLOCK, EXPRESSION, 0, 0);
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::amd_smi_memory_usage>::value, "MemUsg",
trait::name<category::amd_smi_memory_usage>::description, LONG_DESCRIPTION,
COMPONENT, tim::units::mem_repr(tim::units::megabyte),
rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
}
void
@@ -159,10 +158,14 @@ rocpd_process_smi_pmc_events(const uint32_t device_id, const amd_smi::settings&
if(!(settings.busy || settings.temp || settings.power || settings.mem_usage)) return;
auto& data_processor = get_data_processor();
auto event_id = data_processor.insert_event(ROCPROFSYS_CATEGORY_AMD_SMI, 0, 0, 0);
const auto* _name = trait::name<category::amd_smi>::value;
auto name_primary_key = data_processor.insert_string(_name);
auto event_id = data_processor.insert_event(name_primary_key, 0, 0, 0);
auto& _agent_manager = agent_manager::get_instance();
auto base_id = _agent_manager.get_agent_by_id(device_id, agent_type::GPU).base_id;
auto base_id =
_agent_manager.get_agent_by_type_index(device_id, agent_type::GPU).base_id;
auto insert_event_and_sample = [&](bool enabled, const char* name, double value) {
if(!enabled) return;
@@ -392,10 +395,12 @@ config()
for(auto itr : data::device_list)
data::get_initial().at(itr).sample(itr);
if(get_use_rocpd())
metadata_initialize_category();
metadata_initialize_smi_tracks();
for(const auto& _dev_id : data::device_list)
{
rocpd_initialize_category();
rocpd_initialize_smi_tracks();
metadata_initialize_smi_pmc(_dev_id);
}
}
@@ -483,11 +488,6 @@ data::post_process(uint32_t _dev_id)
auto use_perfetto = get_use_perfetto();
auto use_rocpd = get_use_rocpd();
if(use_rocpd)
{
rocpd_initialize_smi_pmc(_dev_id);
}
for(auto& itr : _amd_smi)
{
using counter_track = perfetto_counter_track<data>;
@@ -788,6 +788,7 @@ setup()
is_initialized() = true;
data::setup();
} catch(std::runtime_error& _e)
{
ROCPROFSYS_VERBOSE(0, "Exception thrown when initializing amd-smi: %s\n",
+139 -141
Просмотреть файл
@@ -23,6 +23,7 @@
#include "library/components/backtrace_metrics.hpp"
#include "core/agent.hpp"
#include "core/agent_manager.hpp"
#include "core/common.hpp"
#include "core/components/fwd.hpp"
#include "core/config.hpp"
#include "core/debug.hpp"
@@ -167,7 +168,141 @@ get_enabled(tim::type_list<Tp...>)
(_v.set(_n++, trait::runtime_enabled<Tp>::get()), ...);
return _v;
}
void
rocpd_init_categories()
{
static bool _is_initialized = false;
if(_is_initialized) return;
auto& data_processor = get_data_processor();
data_processor.insert_string(trait::name<category::thread_cpu_time>::value);
data_processor.insert_string(trait::name<category::thread_peak_memory>::value);
data_processor.insert_string(trait::name<category::thread_context_switch>::value);
data_processor.insert_string(trait::name<category::thread_page_fault>::value);
data_processor.insert_string(trait::name<category::thread_hardware_counter>::value);
_is_initialized = true;
}
template <typename Category>
void
rocpd_init_tracks(int64_t _tid)
{
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
const auto& t_info = thread_info::get(_tid, SequentTID);
auto _tid_name = JOIN("", '[', _tid, ']');
auto thread_idx = data_processor.insert_thread_info(
n_info.id, getppid(), getpid(), t_info->index_data->system_value,
JOIN(" ", "Thread", _tid).c_str(), t_info->get_start(), t_info->get_stop(), "{}");
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
{
// Initialize hw_counter_tracks and create one track for each hardware counter
auto _hw_cnt_labels = *get_papi_labels(_tid);
for(auto& itr : _hw_cnt_labels)
{
std::string _desc = tim::papi::get_event_info(itr).short_descr;
if(_desc.empty()) _desc = itr;
ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str());
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
data_processor.insert_track(track_name.c_str(), n_info.id, getpid(),
thread_idx, "{}");
}
}
else
data_processor.insert_track(
JOIN('_', trait::name<Category>::value, _tid_name).c_str(), n_info.id,
getpid(), thread_idx, "{}");
}
template <typename Category>
void
rocpd_initialize_backtrace_metrics_pmc(size_t dev_id, const char* units, int64_t _tid)
{
auto& data_processor = get_data_processor();
auto _tid_name = JOIN("", '[', _tid, ']');
size_t EVENT_CODE = 0;
size_t INSTANCE_ID = 0;
const char* LONG_DESCRIPTION = "";
const char* COMPONENT = "";
const char* BLOCK = "";
const char* EXPRESSION = "";
auto ni = node_info::get_instance();
const auto TARGET_ARCH = "CPU";
auto& agent_mngr = agent_manager::get_instance();
auto base_id = agent_mngr.get_agent_by_id(dev_id, agent_type::CPU).base_id;
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
{
auto _hw_cnt_labels = *get_papi_labels(_tid);
for(auto& itr : _hw_cnt_labels)
{
std::string _desc = tim::papi::get_event_info(itr).short_descr;
if(_desc.empty()) _desc = itr;
ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str());
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
track_name.c_str(), trait::name<Category>::value,
trait::name<Category>::description, LONG_DESCRIPTION, COMPONENT, units,
"ABS", BLOCK, EXPRESSION, 0, 0);
}
}
else
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
JOIN("_", trait::name<Category>::value, _tid_name).c_str(),
trait::name<Category>::value, trait::name<Category>::description,
LONG_DESCRIPTION, COMPONENT, units, "ABS", BLOCK, EXPRESSION, 0, 0);
}
template <typename Category, typename Value>
void
rocpd_process_backtrace_metrics_events(const uint32_t device_id, uint64_t timestamp,
Value value, int64_t _tid)
{
auto& data_processor = get_data_processor();
auto _tid_name = JOIN("", '[', _tid, ']');
auto string_primary_key = data_processor.insert_string(trait::name<Category>::value);
auto event_id = data_processor.insert_event(string_primary_key, 0, 0, 0);
auto& agent_mngr = agent_manager::get_instance();
auto base_id = agent_mngr.get_agent_by_id(device_id, agent_type::CPU).base_id;
auto insert_event_and_sample = [&](const char* _name, double _value) {
data_processor.insert_pmc_event(event_id, base_id, _name, _value);
data_processor.insert_sample(_name, timestamp, event_id);
};
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
{
auto _hw_cnt_labels = *get_papi_labels(_tid);
const auto& hw_counters =
static_cast<backtrace_metrics::hw_counter_data_t>(value);
for(size_t i = 0; i < _hw_cnt_labels.size() && i < hw_counters.size(); ++i)
{
std::string _desc = tim::papi::get_event_info(_hw_cnt_labels[i]).short_descr;
if(_desc.empty()) _desc = _hw_cnt_labels[i];
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
insert_event_and_sample(track_name.c_str(), hw_counters.at(i));
}
}
else
insert_event_and_sample(
JOIN("_", trait::name<Category>::value, _tid_name).c_str(), value);
}
} // namespace
void
backtrace_metrics::sample(int)
{
@@ -337,146 +472,6 @@ backtrace_metrics::fini_perfetto(int64_t _tid, valid_array_t _valid)
}
}
void
rocpd_init_categories()
{
static bool _is_initialized = false;
if(_is_initialized) return;
get_data_processor().insert_category(
category_enum_id<category::thread_cpu_time>::value,
trait::name<category::thread_cpu_time>::value);
get_data_processor().insert_category(
category_enum_id<category::thread_peak_memory>::value,
trait::name<category::thread_peak_memory>::value);
get_data_processor().insert_category(
category_enum_id<category::thread_context_switch>::value,
trait::name<category::thread_context_switch>::value);
get_data_processor().insert_category(
category_enum_id<category::thread_page_fault>::value,
trait::name<category::thread_page_fault>::value);
get_data_processor().insert_category(
category_enum_id<category::thread_hardware_counter>::value,
trait::name<category::thread_hardware_counter>::value);
_is_initialized = true;
}
template <typename Category>
void
rocpd_init_tracks(int64_t _tid)
{
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
const auto& t_info = thread_info::get(_tid, SequentTID);
auto _tid_name = JOIN("", '[', _tid, ']');
auto thread_idx = data_processor.insert_thread_info(
n_info.id, getppid(), getpid(), t_info->index_data->system_value,
JOIN(" ", "Thread", _tid).c_str(), t_info->get_start(), t_info->get_stop(), "{}");
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
{
// Initialize hw_counter_tracks and create one track for each hardware counter
auto _hw_cnt_labels = *get_papi_labels(_tid);
for(auto& itr : _hw_cnt_labels)
{
std::string _desc = tim::papi::get_event_info(itr).short_descr;
if(_desc.empty()) _desc = itr;
ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str());
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
data_processor.insert_track(track_name.c_str(), n_info.id, getpid(),
thread_idx, "{}");
}
}
else
data_processor.insert_track(
JOIN('_', trait::name<Category>::value, _tid_name).c_str(), n_info.id,
getpid(), thread_idx, "{}");
}
template <typename Category>
void
rocpd_initialize_backtrace_metrics_pmc(size_t dev_id, const char* units, int64_t _tid)
{
auto& data_processor = get_data_processor();
auto _tid_name = JOIN("", '[', _tid, ']');
size_t EVENT_CODE = 0;
size_t INSTANCE_ID = 0;
const char* LONG_DESCRIPTION = "";
const char* COMPONENT = "";
const char* BLOCK = "";
const char* EXPRESSION = "";
auto ni = node_info::get_instance();
const auto* TARGET_ARCH = "CPU";
auto& _agent_manager = agent_manager::get_instance();
auto _base_id = _agent_manager.get_agent_by_id(dev_id, agent_type::CPU).base_id;
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
{
auto _hw_cnt_labels = *get_papi_labels(_tid);
for(auto& itr : _hw_cnt_labels)
{
std::string _desc = tim::papi::get_event_info(itr).short_descr;
if(_desc.empty()) _desc = itr;
ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str());
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
data_processor.insert_pmc_description(
ni.id, getpid(), _base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
track_name.c_str(), trait::name<Category>::value,
trait::name<Category>::description, LONG_DESCRIPTION, COMPONENT, units,
"ABS", BLOCK, EXPRESSION, 0, 0);
}
}
else
data_processor.insert_pmc_description(
ni.id, getpid(), _base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
JOIN("_", trait::name<Category>::value, _tid_name).c_str(),
trait::name<Category>::value, trait::name<Category>::description,
LONG_DESCRIPTION, COMPONENT, units, "ABS", BLOCK, EXPRESSION, 0, 0);
}
template <typename Category, typename Value>
void
rocpd_process_backtrace_metrics_events(const uint32_t device_id, uint64_t timestamp,
Value value, int64_t _tid)
{
auto& data_processor = get_data_processor();
auto _tid_name = JOIN("", '[', _tid, ']');
auto event_id =
data_processor.insert_event(category_enum_id<Category>::value, 0, 0, 0);
auto& agent_mngr = agent_manager::get_instance();
auto base_id = agent_mngr.get_agent_by_id(device_id, agent_type::CPU).base_id;
auto insert_event_and_sample = [&](const char* name, double _value) {
data_processor.insert_pmc_event(event_id, base_id, name, _value);
data_processor.insert_sample(name, timestamp, event_id);
};
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
{
auto _hw_cnt_labels = *get_papi_labels(_tid);
const auto& _hw_counters =
static_cast<backtrace_metrics::hw_counter_data_t>(value);
for(size_t i = 0; i < _hw_cnt_labels.size() && i < _hw_counters.size(); ++i)
{
std::string _desc = tim::papi::get_event_info(_hw_cnt_labels[i]).short_descr;
if(_desc.empty()) _desc = _hw_cnt_labels[i];
std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)");
insert_event_and_sample(track_name.c_str(), _hw_counters.at(i));
}
}
else
insert_event_and_sample(
JOIN("_", trait::name<Category>::value, _tid_name).c_str(), value);
}
void
backtrace_metrics::init_rocpd(int64_t _tid, valid_array_t _valid)
{
@@ -641,8 +636,10 @@ backtrace_metrics::post_process_perfetto(int64_t _tid, uint64_t _ts) const
}
void
backtrace_metrics::post_process_rocpd(int64_t _tid, uint64_t _ts) const
backtrace_metrics::post_process_rocpd([[maybe_unused]] int64_t _tid,
[[maybe_unused]] uint64_t _ts) const
{
#if ROCPROFSYS_USE_ROCM > 0
auto is_category_enabled = [&](const auto& _category) { return (*this)(_category); };
if(is_category_enabled(category::thread_cpu_time{}))
@@ -675,6 +672,7 @@ backtrace_metrics::post_process_rocpd(int64_t _tid, uint64_t _ts) const
hw_counter_data_t>(0, _ts, m_hw_counter,
_tid);
}
#endif
}
} // namespace component
} // namespace rocprofsys
+57 -101
Просмотреть файл
@@ -26,7 +26,8 @@
#include "core/config.hpp"
#include "core/node_info.hpp"
#include "core/perfetto.hpp"
#include "core/rocpd/data_processor.hpp"
#include "core/trace_cache/cache_manager.hpp"
#include "core/trace_cache/sample_type.hpp"
#include "library/tracing.hpp"
#include <timemory/units.hpp>
@@ -76,41 +77,25 @@ write_perfetto_counter_track(uint64_t _val)
namespace
{
rocpd::data_processor&
get_data_processor()
{
return rocpd::data_processor::get_instance();
}
void
rocpd_initialize_comm_data_categories()
metadata_initialize_comm_data_categories()
{
static bool _is_initialized = false;
if(_is_initialized) return;
get_data_processor().insert_category(category_enum_id<category::comm_data>::value,
trait::name<category::comm_data>::value);
#if defined(ROCPROFSYS_USE_MPI)
get_data_processor().insert_category(category_enum_id<category::mpi>::value,
trait::name<category::mpi>::value);
#endif
#if defined(ROCPROFSYS_USE_RCCL)
get_data_processor().insert_category(category_enum_id<category::rocm_rccl>::value,
trait::name<category::rocm_rccl>::value);
#endif
trace_cache::get_metadata_registry().add_string(
trait::name<category::comm_data>::value);
trace_cache::get_metadata_registry().add_string(trait::name<category::mpi>::value);
_is_initialized = true;
}
template <typename Track>
void
rocpd_initialize_track()
metadata_initialize_track()
{
auto& n_info = node_info::get_instance();
auto thread_id = std::nullopt;
auto _init_track = [&](const char* label) {
ROCPROFSYS_VERBOSE(3, "INSERT_TRACK label: %s, node ID: %d, Process ID: %d",
label, n_info.id, getpid());
get_data_processor().insert_track(label, n_info.id, getpid(), thread_id);
auto _init_track = [&](const char* label) {
trace_cache::get_metadata_registry().add_track({ label, std::nullopt, "{}" });
};
static std::once_flag _once{};
@@ -118,9 +103,8 @@ rocpd_initialize_track()
}
void
rocpd_initialize_comm_data_pmc()
metadata_initialize_comm_data_pmc()
{
[[maybe_unused]] auto& data_processor = get_data_processor();
// find the proper values for a following definitions
[[maybe_unused]] size_t EVENT_CODE = 0;
[[maybe_unused]] size_t INSTANCE_ID = 0;
@@ -131,56 +115,28 @@ rocpd_initialize_comm_data_pmc()
[[maybe_unused]] constexpr const char* MSG = "bytes";
[[maybe_unused]] constexpr const auto* TARGET_ARCH = "CPU";
auto ni = node_info::get_instance();
constexpr const auto DEVICE_ID = 0; // Assuming CPU device ID is 0
auto& _agent_manager = agent_manager::get_instance();
[[maybe_unused]] auto base_id =
_agent_manager.get_agent_by_id(DEVICE_ID, agent_type::CPU).base_id;
[[maybe_unused]] constexpr const auto DEVICE_ID = 0; // Assuming CPU device ID is 0
#if defined(ROCPROFSYS_USE_MPI)
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
comm_data::mpi_send::label, "Tracks MPI Send communication data sizes",
trait::name<category::mpi>::description, LONG_DESCRIPTION, COMPONENT, MSG, "ABS",
BLOCK, EXPRESSION, 0, 0);
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
comm_data::mpi_recv::label, "Tracks MPI Receive communication data sizes",
trait::name<category::mpi>::description, LONG_DESCRIPTION, COMPONENT, MSG, "ABS",
BLOCK, EXPRESSION, 0, 0);
#endif
#if defined(ROCPROFSYS_USE_RCCL)
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, rccl_send::label,
"Tracks RCCL Send communication data sizes",
trait::name<category::rocm_rccl>::description, LONG_DESCRIPTION, COMPONENT, MSG,
"ABS", BLOCK, EXPRESSION, 0, 0);
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, rccl_recv::label,
"Tracks RCCL Receive communication data sizes",
trait::name<category::rocm_rccl>::description, LONG_DESCRIPTION, COMPONENT, MSG,
"ABS", BLOCK, EXPRESSION, 0, 0);
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::CPU, DEVICE_ID, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
comm_data::mpi_send::label, "Tracks MPI communication data sizes",
trait::name<category::mpi>::description, LONG_DESCRIPTION, COMPONENT, MSG,
rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::CPU, DEVICE_ID, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
comm_data::mpi_recv::label, "Tracks MPI communication data sizes",
trait::name<category::mpi>::description, LONG_DESCRIPTION, COMPONENT, MSG,
rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
#endif
}
template <typename Track>
void
rocpd_process_cpu_usage_events(const uint32_t device_id, int bytes)
cache_comm_data_events(const uint32_t device_id, int bytes)
{
auto& data_processor = get_data_processor();
auto event_id = data_processor.insert_event(
category_enum_id<category::comm_data>::value, 0, 0, 0);
auto& agents = agent_manager::get_instance();
auto agent = agents.get_agent_by_id(device_id, agent_type::CPU);
auto insert_event_and_sample = [&](const char* name, uint64_t timestamp,
double value) {
data_processor.insert_pmc_event(event_id, agent.device_id, name, value);
data_processor.insert_sample(name, timestamp, event_id);
};
auto agent = agents.get_agent_by_type_index(device_id, agent_type::CPU);
static std::mutex _mutex{};
static uint64_t value = 0;
@@ -190,8 +146,20 @@ rocpd_process_cpu_usage_events(const uint32_t device_id, int bytes)
_now = rocprofsys::tracing::now<uint64_t>();
bytes = (value += bytes);
}
const std::string track_name = Track::label;
const size_t timestamp_ns = _now;
const std::string event_metadata = "{}";
const size_t stack_id = 0;
const size_t parent_stack_id = 0;
const size_t correlation_id = 0;
const std::string call_stack = "{}";
const std::string line_info = "{}";
const size_t agent_handle = agent.handle;
insert_event_and_sample(Track::label, _now, bytes);
trace_cache::get_buffer_storage().store(
trace_cache::entry_type::pmc_event_with_sample, track_name.c_str(), timestamp_ns,
event_metadata.c_str(), stack_id, parent_stack_id, correlation_id,
call_stack.c_str(), line_info.c_str(), agent_handle, track_name.c_str(), value);
}
} // namespace
@@ -199,10 +167,14 @@ rocpd_process_cpu_usage_events(const uint32_t device_id, int bytes)
void
comm_data::start()
{
if(get_use_rocpd())
{
rocpd_initialize_comm_data_categories();
rocpd_initialize_comm_data_pmc();
metadata_initialize_comm_data_categories();
metadata_initialize_comm_data_pmc();
#if defined(ROCPROFSYS_USE_MPI)
metadata_initialize_track<mpi_send>();
metadata_initialize_track<mpi_recv>();
#endif
}
}
@@ -248,10 +220,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int cou
write_perfetto_counter_track<mpi_send>(count * _size);
if(get_use_rocpd())
{
rocpd_initialize_track<mpi_send>();
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
cache_comm_data_events<mpi_send>(0, count * _size);
}
if(rocprofsys::get_use_timemory())
@@ -276,10 +246,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count,
if(get_use_perfetto()) write_perfetto_counter_track<mpi_recv>(count * _size);
if(get_use_rocpd())
{
rocpd_initialize_track<mpi_recv>();
rocpd_process_cpu_usage_events<mpi_recv>(0, count * _size);
cache_comm_data_events<mpi_recv>(0, count * _size);
}
if(rocprofsys::get_use_timemory())
@@ -304,10 +272,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int cou
if(get_use_perfetto()) write_perfetto_counter_track<mpi_send>(count * _size);
if(get_use_rocpd())
{
rocpd_initialize_track<mpi_send>();
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
cache_comm_data_events<mpi_send>(0, count * _size);
}
if(rocprofsys::get_use_timemory())
@@ -332,10 +298,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count,
if(get_use_perfetto()) write_perfetto_counter_track<mpi_recv>(count * _size);
if(get_use_rocpd())
{
rocpd_initialize_track<mpi_recv>();
rocpd_process_cpu_usage_events<mpi_recv>(0, count * _size);
cache_comm_data_events<mpi_recv>(0, count * _size);
}
if(rocprofsys::get_use_timemory())
@@ -360,10 +324,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count,
if(get_use_perfetto()) write_perfetto_counter_track<mpi_send>(count * _size);
if(get_use_rocpd())
{
rocpd_initialize_track<mpi_send>();
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
cache_comm_data_events<mpi_send>(0, count * _size);
}
if(rocprofsys::get_use_timemory())
@@ -389,12 +351,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, void*,
write_perfetto_counter_track<mpi_send>(count * _size);
}
if(get_use_rocpd())
{
rocpd_initialize_track<mpi_send>();
rocpd_initialize_track<mpi_recv>();
rocpd_process_cpu_usage_events<mpi_recv>(0, count * _size);
rocpd_process_cpu_usage_events<mpi_send>(0, count * _size);
cache_comm_data_events<mpi_recv>(0, count * _size);
cache_comm_data_events<mpi_send>(0, count * _size);
}
if(rocprofsys::get_use_timemory()) add(_data, count * _size);
@@ -416,10 +375,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
}
if(get_use_rocpd())
{
rocpd_process_cpu_usage_events<mpi_send>(0, sendcount * _send_size);
rocpd_process_cpu_usage_events<mpi_recv>(0, recvcount * _send_size);
cache_comm_data_events<mpi_send>(0, sendcount * _send_size);
cache_comm_data_events<mpi_recv>(0, recvcount * _send_size);
}
if(rocprofsys::get_use_timemory())
@@ -467,10 +425,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
}
if(get_use_rocpd())
{
rocpd_process_cpu_usage_events<mpi_send>(0, sendcount * _send_size);
rocpd_process_cpu_usage_events<mpi_recv>(0, recvcount * _send_size);
cache_comm_data_events<mpi_send>(0, sendcount * _send_size);
cache_comm_data_events<mpi_recv>(0, recvcount * _send_size);
}
if(rocprofsys::get_use_timemory())
@@ -501,10 +458,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen
write_perfetto_counter_track<mpi_recv>(recvcount * _recv_size);
}
if(get_use_rocpd())
{
rocpd_process_cpu_usage_events<mpi_send>(0, sendcount * _send_size);
rocpd_process_cpu_usage_events<mpi_recv>(0, recvcount * _recv_size);
cache_comm_data_events<mpi_send>(0, sendcount * _send_size);
cache_comm_data_events<mpi_recv>(0, recvcount * _recv_size);
}
if(rocprofsys::get_use_timemory())
+87 -98
Просмотреть файл
@@ -29,6 +29,8 @@
#include "core/node_info.hpp"
#include "core/perfetto.hpp"
#include "core/rocpd/data_processor.hpp"
#include "core/timemory.hpp"
#include "core/trace_cache/cache_manager.hpp"
#include "library/components/cpu_freq.hpp"
#include "library/thread_info.hpp"
@@ -91,54 +93,45 @@ get_data_processor()
}
void
rocpd_initialize_cpu_freq_category()
metadata_initialize_cpu_freq_category()
{
get_data_processor().insert_category(ROCPROFSYS_CATEGORY_CPU_FREQ,
trait::name<category::cpu_freq>::value);
trace_cache::get_metadata_registry().add_string(
trait::name<category::cpu_freq>::value);
}
void
rocpd_initialize_cpu_freq_tracks()
metadata_initialize_cpu_freq_tracks()
{
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
const auto thread_idx = std::nullopt; // Internal thread ID for cpu-freq
do_for_enabled_cpus([&](size_t cpu_id) {
data_processor.insert_track(
get_cpu_freq_track_name<category::cpu_freq>(cpu_id).c_str(), n_info.id,
getpid(), thread_idx);
trace_cache::get_metadata_registry().add_track(
{ get_cpu_freq_track_name<category::cpu_freq>(cpu_id).c_str(), std::nullopt,
"{}" });
});
}
void
rocpd_initialize_cpu_usage_tracks()
metadata_initialize_cpu_usage_tracks()
{
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
const auto thread_idx = std::nullopt; // Internal thread ID for cpu-freq
data_processor.insert_track(trait::name<category::process_page>::value, n_info.id,
getpid(), thread_idx);
data_processor.insert_track(trait::name<category::process_virt>::value, n_info.id,
getpid(), thread_idx);
data_processor.insert_track(trait::name<category::process_peak>::value, n_info.id,
getpid(), thread_idx);
data_processor.insert_track(trait::name<category::process_context_switch>::value,
n_info.id, getpid(), thread_idx);
data_processor.insert_track(trait::name<category::process_page_fault>::value,
n_info.id, getpid(), thread_idx);
data_processor.insert_track(trait::name<category::process_user_mode_time>::value,
n_info.id, getpid(), thread_idx);
data_processor.insert_track(trait::name<category::process_kernel_mode_time>::value,
n_info.id, getpid(), thread_idx);
trace_cache::get_metadata_registry().add_track(
{ trait::name<category::process_page>::value, std::nullopt, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trait::name<category::process_virt>::value, std::nullopt, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trait::name<category::process_peak>::value, std::nullopt, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trait::name<category::process_context_switch>::value, std::nullopt, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trait::name<category::process_page_fault>::value, std::nullopt, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trait::name<category::process_user_mode_time>::value, std::nullopt, "{}" });
trace_cache::get_metadata_registry().add_track(
{ trait::name<category::process_kernel_mode_time>::value, std::nullopt, "{}" });
}
void
rocpd_initialize_cpu_freq_pmc(size_t dev_id)
metadata_initialize_cpu_freq_pmc(size_t dev_id)
{
auto& data_processor = get_data_processor();
// find the proper values for a following definitions
// TODO: Find the proper values for a following definitions
size_t EVENT_CODE = 0;
size_t INSTANCE_ID = 0;
const char* LONG_DESCRIPTION = "";
@@ -150,58 +143,56 @@ rocpd_initialize_cpu_freq_pmc(size_t dev_id)
auto ni = node_info::get_instance();
const auto* TARGET_ARCH = "CPU";
auto& _agent_manager = agent_manager::get_instance();
auto base_id = _agent_manager.get_agent_by_id(dev_id, agent_type::CPU).base_id;
do_for_enabled_cpus([&](size_t cpu_id) {
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
get_cpu_freq_track_name<category::cpu_freq>(cpu_id).c_str(), "Frequency",
trait::name<category::cpu_freq>::description, LONG_DESCRIPTION, COMPONENT,
component::cpu_freq::display_unit().c_str(), "ABS", BLOCK, EXPRESSION, 0, 0);
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
get_cpu_freq_track_name<category::cpu_freq>(cpu_id).c_str(), "Frequency",
trait::name<category::cpu_freq>::description, LONG_DESCRIPTION, COMPONENT,
component::cpu_freq::display_unit().c_str(),
rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
});
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_page>::value, "Memory Usage",
trait::name<category::process_page>::description, LONG_DESCRIPTION, COMPONENT,
MEMORY, "ABS", BLOCK, EXPRESSION, 0, 0);
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_page>::value, "Memory Usage",
trait::name<category::process_page>::description, LONG_DESCRIPTION, COMPONENT,
MEMORY, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_virt>::value, "Virtual Memory Usage",
trait::name<category::process_virt>::description, LONG_DESCRIPTION, COMPONENT,
MEMORY, "ABS", BLOCK, EXPRESSION, 0, 0);
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_virt>::value, "Virtual Memory Usage",
trait::name<category::process_virt>::description, LONG_DESCRIPTION, COMPONENT,
MEMORY, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_peak>::value, "Peak Memory",
trait::name<category::process_peak>::description, LONG_DESCRIPTION, COMPONENT,
MEMORY, "ABS", BLOCK, EXPRESSION, 0, 0);
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_peak>::value, "Peak Memory",
trait::name<category::process_peak>::description, LONG_DESCRIPTION, COMPONENT,
MEMORY, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_context_switch>::value, "Context Switches",
trait::name<category::process_context_switch>::description, LONG_DESCRIPTION,
COMPONENT, "", "ABS", BLOCK, EXPRESSION, 0, 0);
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_context_switch>::value, "Context Switches",
trait::name<category::process_context_switch>::description, LONG_DESCRIPTION,
COMPONENT, "", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_page_fault>::value, "Page Faults",
trait::name<category::process_page_fault>::description, LONG_DESCRIPTION,
COMPONENT, "", "ABS", BLOCK, EXPRESSION, 0, 0);
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_page_fault>::value, "Page Faults",
trait::name<category::process_page_fault>::description, LONG_DESCRIPTION,
COMPONENT, "", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_user_mode_time>::value, "User Time",
trait::name<category::process_user_mode_time>::description, LONG_DESCRIPTION,
COMPONENT, TIME, "ABS", BLOCK, EXPRESSION, 0, 0);
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_user_mode_time>::value, "User Time",
trait::name<category::process_user_mode_time>::description, LONG_DESCRIPTION,
COMPONENT, TIME, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
data_processor.insert_pmc_description(
ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_kernel_mode_time>::value, "Kernel Time",
trait::name<category::process_kernel_mode_time>::description, LONG_DESCRIPTION,
COMPONENT, TIME, "ABS", BLOCK, EXPRESSION, 0, 0);
trace_cache::get_metadata_registry().add_pmc_info(
{ agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID,
trait::name<category::process_kernel_mode_time>::value, "Kernel Time",
trait::name<category::process_kernel_mode_time>::description, LONG_DESCRIPTION,
COMPONENT, TIME, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 });
}
void
@@ -211,11 +202,13 @@ rocpd_process_cpu_usage_events(const uint32_t device_id, uint64_t timestamp,
double context_switch, double page_fault, double user_time,
double kernel_time)
{
auto& data_processor = get_data_processor();
auto event_id = data_processor.insert_event(ROCPROFSYS_CATEGORY_CPU_FREQ, 0, 0, 0);
auto& data_processor = get_data_processor();
const auto* _name = trait::name<category::cpu_freq>::value;
auto name_primary_key = data_processor.insert_string(_name);
auto event_id = data_processor.insert_event(name_primary_key, 0, 0, 0);
auto& agent_mngr = agent_manager::get_instance();
auto base_id = agent_mngr.get_agent_by_id(device_id, agent_type::CPU).base_id;
auto base_id = agent_mngr.get_agent_by_type_index(device_id, agent_type::CPU).base_id;
auto insert_event_and_sample = [&](const char* name, double value) {
data_processor.insert_pmc_event(event_id, base_id, name, value);
@@ -258,6 +251,21 @@ setup()
category::process_page_fault, category::process_user_mode_time,
category::process_kernel_mode_time>{});
}
metadata_initialize_cpu_freq_category();
metadata_initialize_cpu_usage_tracks();
metadata_initialize_cpu_freq_tracks();
// `get_enabled_cpus()` returns the number of cores enabled for monitoring but
// the actual device_id is 0, since there is a single device available. And
// the agents seems to be assigned per device basis not per core.
// TODO: `get_enabled_cpus()` should be fixed in the future to align with GPU
// implementation.
auto cpu_agents = agent_manager::get_instance().get_agents_by_type(agent_type::CPU);
for(auto& agent : cpu_agents)
{
metadata_initialize_cpu_freq_pmc(agent->device_id);
}
}
void
@@ -343,25 +351,6 @@ post_process()
auto& enabled_cpus = component::cpu_freq::get_enabled_cpus();
if(get_use_rocpd())
{
rocpd_initialize_cpu_freq_category();
rocpd_initialize_cpu_usage_tracks();
rocpd_initialize_cpu_freq_tracks();
// `get_enabled_cpus()` returns the number of cores enabled for monitoring but the
// actually device_id is 0, since there is a single device available. And the
// agents seems to be assigned per device basis not per core.
// TODO: `get_enabled_cpus()` should be fixed in the future to align with GPU
// implementation.
auto cpu_agents =
agent_manager::get_instance().get_agents_by_type(agent_type::CPU);
for(auto& agent : cpu_agents)
{
rocpd_initialize_cpu_freq_pmc(agent->device_id);
}
}
auto _process_frequencies = [](size_t _idx, size_t _offset) {
using freq_track = perfetto_counter_track<category::cpu_freq>;
+28 -43
Просмотреть файл
@@ -31,10 +31,12 @@
#include "core/defines.hpp"
#include "core/node_info.hpp"
#include "core/perfetto.hpp"
#include "core/rocpd/data_processor.hpp"
#include "core/rocpd/json.hpp"
#include "core/trace_cache/cache_manager.hpp"
#include "core/trace_cache/sample_type.hpp"
#include "library/components/category_region.hpp"
#include "library/runtime.hpp"
#include <optional>
#include <timemory/api/kokkosp.hpp>
#include <timemory/backends/process.hpp>
@@ -157,47 +159,41 @@ violates_name_rules(Arg&& _arg, Args&&... _args)
namespace
{
rocprofsys::rocpd::data_processor&
get_data_processor()
{
return rocprofsys::rocpd::data_processor::get_instance();
}
void
rocpd_initialize_kokkos_category()
metadata_initialize_kokkos_category()
{
get_data_processor().insert_category(
rocprofsys::category_enum_id<category::kokkos>::value,
rocprofsys::trace_cache::get_metadata_registry().add_string(
rocprofsys::trait::name<category::kokkos>::value);
}
void
rocpd_initialize_kokkos_track()
metadata_initialize_kokkos_track()
{
auto& data_processor = get_data_processor();
auto& n_info = rocprofsys::node_info::get_instance();
auto thread_id = std::nullopt;
data_processor.insert_track(rocprofsys::trait::name<category::kokkos>::value,
n_info.id, getpid(), thread_id);
rocprofsys::trace_cache::get_metadata_registry().add_track(
{ rocprofsys::trait::name<category::kokkos>::value, std::nullopt, "{}" });
}
void
rocpd_process_kokkos_event(const char* name, const char* event_type, const char* target,
uint64_t timestamp_ns)
cache_kokkos_event(const char* name, const char* event_type, const char* target,
uint64_t timestamp_ns)
{
auto& data_processor = get_data_processor();
auto event_metadata = rocpd::json::create();
auto event_metadata = rocpd::json::create();
event_metadata->set("name", name);
event_metadata->set("event_type", event_type);
event_metadata->set("target", target);
auto event_id = data_processor.insert_event(
rocprofsys::category_enum_id<category::kokkos>::value, 0, 0, 0, "{}", "{}",
event_metadata->to_string().c_str());
data_processor.insert_sample(rocprofsys::trait::name<category::kokkos>::value,
timestamp_ns, event_id, "{}");
const size_t stack_id = 0;
const size_t parent_stack_id = 0;
const size_t correlation_id = 0;
const char* call_stack = "{}";
const char* line_info = "{}";
rocprofsys::trace_cache::get_buffer_storage().store(
rocprofsys::trace_cache::entry_type::in_time_sample,
rocprofsys::trait::name<category::kokkos>::value, timestamp_ns,
event_metadata->to_string().c_str(), stack_id, parent_stack_id, correlation_id,
call_stack, line_info);
}
} // namespace
@@ -308,11 +304,8 @@ extern "C"
rocprofsys_init_hidden(_mode.c_str(), false, _arg0.c_str());
rocprofsys_push_trace_hidden("kokkos_main");
if(rocprofsys::get_use_rocpd())
{
rocpd_initialize_kokkos_category();
rocpd_initialize_kokkos_track();
}
metadata_initialize_kokkos_category();
metadata_initialize_kokkos_track();
}
setup_kernel_logger();
@@ -619,12 +612,8 @@ extern "C"
kokkosp::profiler_t<kokkosp_region>{ _name }.mark();
}
if(rocprofsys::config::get_use_rocpd())
{
rocpd_process_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(),
"[dual_view_sync]",
(is_device) ? "device" : "host", timestamp);
}
cache_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(), "[dual_view_sync]",
(is_device) ? "device" : "host", timestamp);
}
void kokkosp_dual_view_modify(const char* label, const void* const, bool is_device)
@@ -648,12 +637,8 @@ extern "C"
kokkosp::profiler_t<kokkosp_region>{ _name }.mark();
}
if(rocprofsys::config::get_use_rocpd())
{
rocpd_process_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(),
"[dual_view_modify]",
(is_device) ? "device" : "host", timestamp);
}
cache_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(), "[dual_view_modify]",
(is_device) ? "device" : "host", timestamp);
}
//----------------------------------------------------------------------------------//
+493 -56
Просмотреть файл
@@ -20,24 +20,31 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "library/rocprofiler-sdk.hpp"
#include "core/rocprofiler-sdk.hpp"
#include "api.hpp"
#include "common/synchronized.hpp"
#include "core/common.hpp"
#include "core/config.hpp"
#include "core/containers/stable_vector.hpp"
#include "core/debug.hpp"
#include "core/gpu.hpp"
#include "core/perfetto.hpp"
#include "core/rocprofiler-sdk.hpp"
#include "core/rocpd/json.hpp"
#include "core/state.hpp"
#include "core/trace_cache/buffer_storage.hpp"
#include "core/trace_cache/cache_manager.hpp"
#include "core/trace_cache/metadata_registry.hpp"
#include "core/trace_cache/sample_type.hpp"
#include "library/amd_smi.hpp"
#include "library/components/category_region.hpp"
#include "library/rocprofiler-sdk.hpp"
#include "library/rocprofiler-sdk/counters.hpp"
#include "library/rocprofiler-sdk/fwd.hpp"
#include "library/rocprofiler-sdk/rccl.hpp"
#include "library/thread_info.hpp"
#include "library/tracing.hpp"
#include <algorithm>
#include <timemory/components/timing/wall_clock.hpp>
#include <timemory/hash/types.hpp>
#include <timemory/unwind/processed_entry.hpp>
@@ -247,7 +254,7 @@ create_agent_profile(rocprofiler_agent_id_t agent_id,
ROCPROFSYS_ABORT_F(
"Unable to find all counters for agent %i (gpu-%li, %s) in %s. Found: %s\n",
tool_agent_v->agent->node_id, tool_agent_v->device_id,
tool_agent_v->agent->name, requested_counters.c_str(),
tool_agent_v->agent->name.c_str(), requested_counters.c_str(),
found_counters.c_str());
}
@@ -270,6 +277,12 @@ get_kernel_symbol_info(uint64_t _kernel_id)
return tool_data->get_kernel_symbol_info(_kernel_id);
}
const rocprofiler_callback_tracing_code_object_load_data_t*
get_code_object_info(uint64_t _code_object_id)
{
return tool_data->get_code_object_info(_code_object_id);
}
// Implementation of rocprofiler_callback_tracing_operation_args_cb_t
int
save_args(rocprofiler_callback_tracing_kind_t /*kind*/, int32_t /*operation*/,
@@ -283,6 +296,22 @@ save_args(rocprofiler_callback_tracing_kind_t /*kind*/, int32_t /*operation*/,
return 0;
}
// Additional implementation of rocprofiler_callback_tracing_operation_args_cb_t
// for iterating through arguments in a callback for rocpd_arg table in database
int
iterate_args_callback(rocprofiler_callback_tracing_kind_t /*kind*/, int32_t /*operation*/,
uint32_t arg_number, const void* const /*arg_value_addr*/,
int32_t /*arg_indirection_count*/, const char* arg_type,
const char* arg_name, const char* arg_value_str,
int32_t /*arg_dereference_count*/, void* data)
{
auto* _data = static_cast<function_args_t*>(data);
if(arg_type && arg_name && arg_value_str)
_data->emplace_back(
argument_info{ arg_number, demangle(arg_type), arg_name, arg_value_str });
return 0;
}
auto&
get_marker_pushed_ranges()
{
@@ -299,6 +328,294 @@ get_marker_started_ranges()
return _v;
}
template <typename Tp, typename... Args>
Tp*
as_pointer(Args&&... _args)
{
return new Tp{ std::forward<Args>(_args)... };
}
template <typename... Tp>
void
consume_args(Tp&&...)
{}
auto
get_backtrace(std::optional<std::vector<tim::unwind::processed_entry>>& _bt_data)
{
auto backtrace = ::rocpd::json::create();
if(_bt_data && !_bt_data->empty())
{
const std::string _unk = "??";
size_t _bt_cnt = 0;
for(const auto& itr : *_bt_data)
{
auto _linfo = itr.lineinfo.get();
const auto* _func = (itr.name.empty()) ? &_unk : &itr.name;
const auto* _loc = (_linfo && !_linfo.location.empty())
? &_linfo.location
: ((itr.location.empty()) ? &_unk : &itr.location);
auto _line =
(_linfo && _linfo.line > 0)
? join("", _linfo.line)
: ((itr.lineno == 0) ? std::string{ "?" } : join("", itr.lineno));
auto _entry = join("", demangle(*_func), " @ ",
join(':', ::basename(_loc->c_str()), _line));
backtrace->set(join("", "frame#", _bt_cnt++), _entry);
}
}
return backtrace;
}
template <typename CorrelationIdType>
uint64_t
get_parent_stack_id([[maybe_unused]] const CorrelationIdType& correlation_id)
{
#if(ROCPROFILER_VERSION >= 700)
if constexpr(std::is_same_v<rocprofiler_correlation_id_t, CorrelationIdType>)
{
return correlation_id.ancestor;
}
else
{
return 0;
}
#else
return 0;
#endif
}
auto
get_extdata(const rocprofiler_callback_tracing_record_t& record)
{
constexpr auto message_key = "message";
auto args = callback_arg_array_t{};
auto extdata = ::rocpd::json::create();
rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, 2, &args);
for(auto [key, val] : args)
{
if(key == message_key)
{
extdata->set(key, val);
}
}
return extdata;
}
struct scope_destructor
{
/// \fn scope_destructor(FuncT&& _fini, InitT&& _init)
/// \tparam FuncT "std::function<void()> or void (*)()"
/// \tparam InitT "std::function<void()> or void (*)()"
/// \param _fini Function to execute when object is destroyed
/// \param _init Function to execute when object is created (optional)
///
/// \brief Provides a utility to perform an operation when exiting a scope.
template <typename FuncT, typename InitT = void (*)()>
scope_destructor(FuncT&& _fini, InitT&& _init = []() {});
~scope_destructor() { m_functor(); }
// delete copy operations
scope_destructor(const scope_destructor&) = delete;
scope_destructor& operator=(const scope_destructor&) = delete;
// allow move operations
scope_destructor(scope_destructor&& rhs) noexcept;
scope_destructor& operator=(scope_destructor&& rhs) noexcept;
private:
std::function<void()> m_functor = []() {};
};
template <typename FuncT, typename InitT>
scope_destructor::scope_destructor(FuncT&& _fini, InitT&& _init)
: m_functor{ std::forward<FuncT>(_fini) }
{
_init();
}
inline scope_destructor::scope_destructor(scope_destructor&& rhs) noexcept
: m_functor{ std::move(rhs.m_functor) }
{
rhs.m_functor = []() {};
}
inline scope_destructor&
scope_destructor::operator=(scope_destructor&& rhs) noexcept
{
if(this != &rhs)
{
m_functor = std::move(rhs.m_functor);
rhs.m_functor = []() {};
}
return *this;
}
using kernel_rename_stack_t = std::stack<uint64_t>;
thread_local auto thread_dispatch_rename = as_pointer<kernel_rename_stack_t>();
thread_local auto thread_dispatch_rename_dtor = scope_destructor{ []() {
delete thread_dispatch_rename;
thread_dispatch_rename = nullptr;
} };
template <typename Category>
void
cache_category()
{
trace_cache::get_metadata_registry().add_string(trait::name<Category>::value);
}
void
cache_add_thread_info(uint64_t tid)
{
trace_cache::get_metadata_registry().add_thread_info(
{ getppid(), getpid(), tid, 0, 0, "{}" });
}
void
cache_add_track(const char* track_name, uint64_t tid)
{
trace_cache::get_metadata_registry().add_track({ track_name, tid, "{}" });
}
size_t
get_mem_copy_dst_address(
[[maybe_unused]] const rocprofiler_buffer_tracing_memory_copy_record_t& record)
{
#if(ROCPROFILER_VERSION >= 700)
return record.dst_address.value;
#else
return 0;
#endif
}
size_t
get_mem_copy_src_address(
[[maybe_unused]] const rocprofiler_buffer_tracing_memory_copy_record_t& record)
{
#if(ROCPROFILER_VERSION >= 700)
return record.src_address.value;
#else
return 0;
#endif
}
#if(ROCPROFILER_VERSION >= 600)
size_t
get_mem_alloc_address(
[[maybe_unused]] const rocprofiler_buffer_tracing_memory_allocation_record_t& record)
{
# if(ROCPROFILER_VERSION >= 700)
return record.address.value;
# else
return static_cast<size_t>(record.address.handle);
# endif
}
#endif
// clang-format off
void
cache_region(const rocprofiler_callback_tracing_record_t* record,
const rocprofiler_timestamp_t start_timestamp,
const rocprofiler_timestamp_t end_timestamp, const std::string& call_stack,
const std::string& args_str, const std::string& category)
{
trace_cache::get_buffer_storage().store(
trace_cache::entry_type::region,
record->thread_id,
static_cast<int32_t>(record->kind),
static_cast<int32_t>(record->operation),
record->correlation_id.internal,
get_parent_stack_id(record->correlation_id),
start_timestamp,
end_timestamp,
call_stack.c_str(),
args_str.c_str(),
category.c_str());
}
void
cache_kernel_dispatch(rocprofiler_buffer_tracing_kernel_dispatch_record_t* record)
{
auto stream_handle = get_stream_id(record).handle;
auto queue_handle = record->dispatch_info.queue_id.handle;
trace_cache::get_metadata_registry().add_queue(queue_handle);
trace_cache::get_metadata_registry().add_stream(stream_handle);
trace_cache::get_buffer_storage().store(
trace_cache::entry_type::kernel_dispatch,
record->start_timestamp,
record->end_timestamp,
record->thread_id,
record->dispatch_info.agent_id.handle,
record->dispatch_info.kernel_id,
record->dispatch_info.dispatch_id,
record->dispatch_info.queue_id.handle,
record->correlation_id.internal,
get_parent_stack_id(record->correlation_id),
record->dispatch_info.private_segment_size,
record->dispatch_info.group_segment_size,
record->dispatch_info.workgroup_size.x,
record->dispatch_info.workgroup_size.y,
record->dispatch_info.workgroup_size.z,
record->dispatch_info.grid_size.x,
record->dispatch_info.grid_size.y,
record->dispatch_info.grid_size.z,
stream_handle);
}
void
cache_memory_copy(rocprofiler_buffer_tracing_memory_copy_record_t* record)
{
auto stream_handle = get_stream_id(record).handle;
trace_cache::get_buffer_storage().store(
trace_cache::entry_type::memory_copy,
record->start_timestamp,
record->end_timestamp,
record->thread_id,
record->dst_agent_id.handle,
record->src_agent_id.handle,
static_cast<int32_t>(record->kind),
static_cast<int32_t>(record->operation),
record->bytes,
record->correlation_id.internal,
get_parent_stack_id(record->correlation_id),
get_mem_copy_dst_address(*record),
get_mem_copy_src_address(*record),
stream_handle);
}
#if (ROCPROFILER_VERSION >= 600)
void
cache_memory_allocation(rocprofiler_buffer_tracing_memory_allocation_record_t* record)
{
auto stream_handle = get_stream_id(record).handle;
trace_cache::get_metadata_registry().add_stream(stream_handle);
trace_cache::get_buffer_storage().store(
trace_cache::entry_type::memory_alloc,
record->start_timestamp,
record->end_timestamp,
record->thread_id,
record->agent_id.handle,
static_cast<int32_t>(record->kind),
static_cast<int32_t>(record->operation),
record->allocation_size,
record->correlation_id.internal,
get_parent_stack_id(record->correlation_id),
get_mem_alloc_address(*record),
stream_handle);
}
#endif
// clang-format on
template <typename CategoryT>
void
tool_tracing_callback_start(CategoryT, rocprofiler_callback_tracing_record_t record,
@@ -391,7 +708,8 @@ tool_tracing_callback_stop(
{
ROCPROFSYS_CONDITIONAL_ABORT_F(
get_marker_started_ranges().empty(),
"roctxRangeStop does not have corresponding roctxRangeStart on "
"roctxRangeStop does not have corresponding roctxRangeStart "
"on "
"this thread");
auto _hash = get_marker_started_ranges().back().first;
@@ -473,8 +791,9 @@ tool_tracing_callback_stop(
join(':', ::basename(_loc->c_str()), _line));
if(_bt_cnt < 10)
{
// Prepend zero for better ordering in UI. Only one zero
// is ever necessary since stack depth is limited to 16.
// Prepend zero for better ordering in UI. Only one
// zero is ever necessary since stack depth is limited
// to 16.
tracing::add_perfetto_annotation(
ctx, join("", "frame#0", _bt_cnt++), _entry);
}
@@ -493,6 +812,33 @@ tool_tracing_callback_stop(
tracing::add_perfetto_annotation(ctx, "end_ns", _end_ts);
});
}
// Insert callback trace into database
auto args = function_args_t{};
rocprofiler_iterate_callback_tracing_kind_operation_args(
record, iterate_args_callback, 2, &args);
auto call_stack = get_backtrace(_bt_data);
uint64_t _beg_ts = user_data->value;
uint64_t _end_ts = ts;
{
cache_category<CategoryT>();
cache_add_thread_info(record.thread_id);
std::string args_str;
std::for_each(args.begin(), args.end(), [&args_str](const argument_info& arg) {
const auto* delimiter = ";;";
std::stringstream ss;
ss << arg.arg_number << delimiter << arg.arg_type << delimiter << arg.arg_name
<< delimiter << arg.arg_value << delimiter;
args_str.append(ss.str());
});
cache_region(&record, _beg_ts, _end_ts, call_stack->to_string(), args_str,
trait::name<CategoryT>::value);
}
}
void
@@ -535,6 +881,7 @@ tool_code_object_callback(rocprofiler_callback_tracing_record_t record,
_data.emplace_back(
code_object_callback_record_t{ ts, record, data_v });
});
trace_cache::get_metadata_registry().add_code_object(data_v);
}
else if(record.operation ==
ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER)
@@ -545,6 +892,7 @@ tool_code_object_callback(rocprofiler_callback_tracing_record_t record,
_data.emplace_back(
new kernel_symbol_callback_record_t{ ts, record, data_v });
});
trace_cache::get_metadata_registry().add_kernel_symbol(data_v);
}
}
return;
@@ -635,7 +983,6 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY:
#if(ROCPROFILER_VERSION >= 600)
case ROCPROFILER_CALLBACK_TRACING_OMPT:
case ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION:
case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION:
#endif
#if(ROCPROFILER_VERSION >= 700)
@@ -646,6 +993,12 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
record.kind);
break;
}
default:
{
ROCPROFSYS_CI_ABORT(true, "Unhandled callback record: \n\t%s\n",
info.str().c_str());
break;
}
}
}
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
@@ -657,8 +1010,11 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
constexpr bool bt_with_signal_frame = true;
auto _bt_data = std::optional<backtrace_entry_vec_t>{};
auto use_perfetto =
(config::get_use_perfetto() && config::get_perfetto_annotations());
auto use_rocpd = config::get_use_rocpd();
if(config::get_use_perfetto() && config::get_perfetto_annotations() &&
if((use_perfetto || use_rocpd) &&
tool_data->backtrace_operations.at(record.kind).count(record.operation) > 0)
{
auto _backtrace = tim::get_unw_stack<bt_stack_depth, bt_ignore_depth,
@@ -735,7 +1091,6 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY:
#if(ROCPROFILER_VERSION >= 600)
case ROCPROFILER_CALLBACK_TRACING_OMPT:
case ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION:
case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION:
#endif
#if(ROCPROFILER_VERSION >= 700)
@@ -746,27 +1101,41 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
record.kind);
break;
}
default:
{
ROCPROFSYS_CI_ABORT(true, "Unhandled callback record\n\t%s\n",
info.str().c_str());
break;
}
}
}
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_NONE)
{
if(record.kind == ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH &&
record.operation == ROCPROFILER_KERNEL_DISPATCH_COMPLETE)
switch(record.kind)
{
auto* _data =
static_cast<rocprofiler_callback_tracing_kernel_dispatch_data_t*>(
record.payload);
case ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH:
{
if(record.operation == ROCPROFILER_KERNEL_DISPATCH_COMPLETE)
{
auto* _data =
static_cast<rocprofiler_callback_tracing_kernel_dispatch_data_t*>(
record.payload);
// save for post-processing
get_kernel_dispatch_timestamps().emplace(
_data->dispatch_info.dispatch_id,
timing_interval{ _data->start_timestamp, _data->end_timestamp });
}
else
{
ROCPROFSYS_WARNING_F(
1, "tool_tracing_callback: unhandled PHASE_NONE callback record\n\t%s\n",
info.str().c_str());
// save for post-processing
get_kernel_dispatch_timestamps().emplace(
_data->dispatch_info.dispatch_id,
timing_interval{ _data->start_timestamp, _data->end_timestamp });
}
}
break;
default:
{
ROCPROFSYS_WARNING_F(1,
"tool_tracing_callback: unhandled PHASE_NONE "
"callback record\n\t%s\n",
info.str().c_str());
}
break;
}
}
else
@@ -820,10 +1189,20 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
uint64_t _stream_id = get_stream_id(record).handle;
if(_stream_id == 0)
{
// kernel is not associated with a HIP stream
// kernel_dispatch is not associated with a HIP stream
_group_by_queue = true;
}
{
cache_category<category::rocm_kernel_dispatch>();
cache_add_thread_info(record->thread_id);
cache_add_track(JOIN("", "GPU Kernel Dispatch [", _agent->device_id,
"] Queue ", _queue_id.handle)
.c_str(),
record->thread_id);
cache_kernel_dispatch(record);
}
if(get_use_timemory())
{
const auto& _tinfo = thread_info::get(record->thread_id, SystemTID);
@@ -937,6 +1316,20 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
_group_by_queue = true;
}
{
size_t thread_idx = record->thread_id;
std::string track_name;
track_name =
JOIN("", "GPU Memory Copy to Agent [",
_dst_agent->logical_node_id, "] Thread ", thread_idx);
cache_category<category::rocm_memory_copy>();
cache_add_track(track_name.c_str(), record->thread_id);
cache_memory_copy(record);
}
if(get_use_timemory())
{
const auto& _tinfo = thread_info::get(record->thread_id, SystemTID);
@@ -1005,6 +1398,26 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
}
}
}
#if(ROCPROFILER_VERSION >= 600)
else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION)
{
auto* record =
static_cast<rocprofiler_buffer_tracing_memory_allocation_record_t*>(
header->payload);
{
cache_category<category::rocm_memory_allocate>();
cache_add_thread_info(record->thread_id);
cache_memory_allocation(record);
}
}
#endif
else if(header->kind == ROCPROFILER_BUFFER_TRACING_HSA_CORE_API ||
header->kind == ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API)
{
// Not handling those buffered events
continue;
}
else
{
ROCPROFSYS_THROW(
@@ -1098,10 +1511,10 @@ counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_da
ROCPROFSYS_CONDITIONAL_ABORT_F(
!_agent, "unable to find tool agent for agent (id=%zu)\n",
_agent_id.handle);
ROCPROFSYS_CONDITIONAL_ABORT_F(
!_info,
"unable to find counter info for counter (id=%zu) on agent (id=%zu)\n",
itr.first.handle, _agent_id.handle);
ROCPROFSYS_CONDITIONAL_ABORT_F(!_info,
"unable to find counter info for counter "
"(id=%zu) on agent (id=%zu)\n",
itr.first.handle, _agent_id.handle);
auto _dev_id = static_cast<uint32_t>(_agent->device_id);
@@ -1159,7 +1572,6 @@ void
flush()
{
if(!tool_data) return;
for(auto itr : tool_data->get_buffers())
{
if(itr.handle > 0)
@@ -1268,6 +1680,24 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
_data->primary_ctx, ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, nullptr, 0,
tool_code_object_callback, _data));
auto external_corr_id_request_kinds =
std::array<rocprofiler_external_correlation_id_request_kind_t, 3>{
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH,
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY,
#if(ROCPROFILER_VERSION >= 600)
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_ALLOCATION
#endif
};
// Insert the default stream and queue info to ensure that the default entry is
{
trace_cache::get_metadata_registry().add_stream(0);
trace_cache::get_metadata_registry().add_queue(0);
}
// ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
// _data->primary_ctx, ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, nullptr, 0,
// tool_code_object_callback, _data));
for(auto itr : {
ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API,
ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API,
@@ -1296,17 +1726,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
}
}
constexpr auto buffer_size = 8192;
constexpr auto watermark = 7936;
// Configure external correlation id request service for kernel dispatch
// and memory copy.
auto external_corr_id_request_kinds =
std::array<rocprofiler_external_correlation_id_request_kind_t, 2>{
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH,
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY
};
constexpr auto buffer_size = 16 * 4096;
constexpr auto watermark = 15 * 4096;
ROCPROFILER_CALL(rocprofiler_configure_external_correlation_id_request_service(
_data->primary_ctx, external_corr_id_request_kinds.data(),
@@ -1334,7 +1755,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
_data->primary_ctx, ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, nullptr, 0,
_data->kernel_dispatch_buffer));
}
// ROCPROFILER_BUFFER_TRACING_HSA_CORE_API, ///< @see
// ::rocprofiler_hsa_core_api_id_t ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API,
if(_buffered_domain.count(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY) > 0)
{
ROCPROFILER_CALL(rocprofiler_create_buffer(
@@ -1347,19 +1769,39 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
_data->memory_copy_buffer));
}
#if(ROCPROFILER_VERSION >= 600)
if(_buffered_domain.count(ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION) > 0)
{
ROCPROFILER_CALL(rocprofiler_create_buffer(
_data->primary_ctx, buffer_size, watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS, tool_tracing_buffered, tool_data,
&_data->memory_alloc_buffer));
if(_data->memory_alloc_buffer.handle == 0UL)
{
ROCPROFSYS_CI_ABORT(true, "Failed to create memory allocation buffer\n");
}
auto _ops =
rocprofiler_sdk::get_operations(ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION);
ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service(
_data->primary_ctx, ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION, nullptr, 0,
_data->memory_alloc_buffer));
}
#endif
if(!_counter_events.empty())
{
for(const auto& itr : _data->gpu_agents)
{
const auto& _agent_id = rocprofiler_agent_id_t{ itr.agent->handle };
_data->agent_events.emplace(
itr.agent->id,
create_agent_profile(itr.agent->id, _counter_events, _data));
_agent_id, create_agent_profile(_agent_id, _counter_events, _data));
}
ROCPROFILER_CALL(rocprofiler_create_context(&_data->counter_ctx));
auto _operations = std::array<rocprofiler_tracing_operation_t, 1>{
ROCPROFILER_KERNEL_DISPATCH_COMPLETE
ROCPROFILER_KERNEL_DISPATCH_COMPLETE,
};
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
@@ -1393,8 +1835,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
}
}
// throwaway context for handling the profiler control API. If primary_ctx were used,
// we would get profiler pause callback but never get profiler resume callback
// throwaway context for handling the profiler control API. If primary_ctx were
// used, we would get profiler pause callback but never get profiler resume
// callback
{
auto _local_ctx = rocprofiler_context_id_t{ 0 };
ROCPROFILER_CALL(rocprofiler_create_context(&_local_ctx));
@@ -1405,8 +1848,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data)
if(!is_valid(_data->primary_ctx))
{
// notify rocprofiler that initialization failed and all the contexts, buffers,
// etc. created should be ignored
// notify rocprofiler that initialization failed and all the contexts,
// buffers, etc. created should be ignored
return -1;
}
@@ -1454,13 +1897,7 @@ tool_fini(void* callback_data)
void
setup()
{
if(int status = 0;
rocprofiler_is_initialized(&status) == ROCPROFILER_STATUS_SUCCESS && status == 0)
{
ROCPROFILER_CALL(rocprofiler_force_configure(&rocprofiler_configure));
}
}
{}
void
shutdown()
+27 -49
Просмотреть файл
@@ -91,15 +91,17 @@ get_agent_counter_info(const tool_agent_vec_t& _agents)
for(auto itr : _agents)
{
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
itr.agent->id, counters_supported_callback, &_data));
const auto& _agent_id = rocprofiler_agent_id_t{ itr.agent->handle };
std::sort(_data.at(itr.agent->id).begin(), _data.at(itr.agent->id).end(),
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
_agent_id, counters_supported_callback, &_data));
std::sort(_data.at(_agent_id).begin(), _data.at(_agent_id).end(),
[](const auto& lhs, const auto& rhs) {
return (lhs.id.handle < rhs.id.handle);
});
for(auto& citr : _data.at(itr.agent->id))
for(auto& citr : _data.at(_agent_id))
{
std::sort(citr.dimension_info.begin(), citr.dimension_info.end(),
[](const auto& lhs, const auto& rhs) { return (lhs.id < rhs.id); });
@@ -123,38 +125,16 @@ client_data::initialize()
buffered_tracing_info = rocprofiler::sdk::get_buffer_tracing_names();
callback_tracing_info = rocprofiler::sdk::get_callback_tracing_names();
static constexpr auto supported_agent_info_version = ROCPROFILER_AGENT_INFO_VERSION_0;
rocprofiler_query_available_agents_cb_t iterate_cb =
[](rocprofiler_agent_version_t version, const void** agents_arr,
size_t num_agents, void* user_data) {
ROCPROFSYS_CONDITIONAL_ABORT(version != supported_agent_info_version,
"rocprofiler agent info version != expected "
"agent info version (=%i). value: %i\n",
supported_agent_info_version, version);
auto _agents_v = std::vector<rocprofiler_agent_v0_t>{};
for(size_t i = 0; i < num_agents; ++i)
{
const auto* _agent =
static_cast<const rocprofiler_agent_v0_t*>(agents_arr[i]);
_agents_v.emplace_back(*_agent);
}
auto* tool_data_v = as_client_data(user_data);
tool_data_v->set_agents(std::move(_agents_v));
return ROCPROFILER_STATUS_SUCCESS;
};
ROCPROFILER_CALL(rocprofiler_query_available_agents(
supported_agent_info_version, iterate_cb, sizeof(rocprofiler_agent_t), this));
set_agents();
}
void
client_data::initialize_event_info()
{
if(agents.empty()) initialize();
if(agent_manager::get_instance().get_agents().empty())
{
initialize();
}
if(agent_counter_info.size() != gpu_agents.size())
agent_counter_info = get_agent_counter_info(gpu_agents);
@@ -166,14 +146,15 @@ client_data::initialize_event_info()
for(const auto& aitr : gpu_agents)
{
auto _dev_index = aitr.device_id;
auto _device_qualifier_sym = JOIN("", ":device=", _dev_index);
auto _device_qualifier =
auto _dev_index = aitr.device_id;
const auto& _agent_id = rocprofiler_agent_id_t{ aitr.agent->handle };
auto _device_qualifier_sym = JOIN("", ":device=", _dev_index);
auto _device_qualifier =
tim::hardware_counters::qualifier{ true, static_cast<int>(_dev_index),
_device_qualifier_sym,
JOIN(" ", "Device", _dev_index) };
auto _counter_info = agent_counter_info.at(aitr.agent->id);
auto _counter_info = agent_counter_info.at(_agent_id);
std::sort(_counter_info.begin(), _counter_info.end(),
[](const rocprofiler_tool_counter_info_t& lhs,
const rocprofiler_tool_counter_info_t& rhs) {
@@ -248,23 +229,20 @@ client_data::initialize_event_info()
}
void
client_data::set_agents(agent_vec_t&& _agents_v)
client_data::set_agents()
{
agents = std::move(_agents_v);
auto& agent_mngr = agent_manager::get_instance();
std::sort(agents.begin(), agents.end(),
[](const auto& lhs, const auto& rhs) { return lhs.node_id < rhs.node_id; });
auto fill_agents = [&](agent_type type, std::vector<tool_agent>& out) {
const auto& _agents = agent_mngr.get_agents_by_type(type);
for(const auto& agent : _agents)
{
out.emplace_back(tool_agent{ agent->device_type_index, agent.get() });
}
};
cpu_agents.clear();
gpu_agents.clear();
for(const auto& itr : agents)
{
if(itr.type == ROCPROFILER_AGENT_TYPE_CPU)
cpu_agents.emplace_back(tool_agent{ cpu_agents.size(), &itr });
else if(itr.type == ROCPROFILER_AGENT_TYPE_GPU)
gpu_agents.emplace_back(tool_agent{ gpu_agents.size(), &itr });
}
fill_agents(agent_type::GPU, gpu_agents);
fill_agents(agent_type::CPU, cpu_agents);
}
} // namespace rocprofiler_sdk
} // namespace rocprofsys
+45 -11
Просмотреть файл
@@ -23,6 +23,7 @@
#pragma once
#include "common/synchronized.hpp"
#include "core/agent_manager.hpp"
#include "core/timemory.hpp"
#include <rocprofiler-sdk/agent.h>
@@ -50,6 +51,8 @@ using kernel_symbol_map_t =
std::unordered_map<rocprofiler_kernel_id_t, kernel_symbol_data_t>;
using callback_arg_array_t = std::vector<std::pair<std::string, std::string>>;
using rocprofsys_agent_t = agent;
struct code_object_callback_record_t
{
uint64_t timestamp = 0;
@@ -88,8 +91,8 @@ struct rocprofiler_tool_counter_info_t : rocprofiler_counter_info_v0_t
struct tool_agent
{
uint64_t device_id = 0;
const rocprofiler_agent_v0_t* agent = nullptr;
uint64_t device_id = 0;
const rocprofsys_agent_t* agent = nullptr;
};
struct timing_interval
@@ -98,6 +101,16 @@ struct timing_interval
rocprofiler_timestamp_t end = 0;
};
struct argument_info
{
uint32_t arg_number = 0;
std::string arg_type = {};
std::string arg_name = {};
std::string arg_value = {};
};
using function_args_t = std::vector<argument_info>;
using agent_counter_info_map_t =
std::unordered_map<rocprofiler_agent_id_t,
std::vector<rocprofiler_tool_counter_info_t>>;
@@ -117,7 +130,7 @@ using backtrace_operation_map_t =
struct client_data
{
static constexpr size_t num_buffers = 3;
static constexpr size_t num_buffers = 4;
static constexpr size_t num_contexts = 2;
using buffer_name_info_t = rocprofiler::sdk::buffer_name_info_t<std::string_view>;
@@ -134,8 +147,8 @@ struct client_data
rocprofiler_context_id_t counter_ctx = { 0 };
rocprofiler_buffer_id_t kernel_dispatch_buffer = { 0 };
rocprofiler_buffer_id_t memory_copy_buffer = { 0 };
rocprofiler_buffer_id_t memory_alloc_buffer = { 0 };
rocprofiler_buffer_id_t counter_collection_buffer = { 0 };
std::vector<rocprofiler_agent_v0_t> agents = {};
std::vector<tool_agent> cpu_agents = {};
std::vector<tool_agent> gpu_agents = {};
std::vector<hardware_counter_info> events_info = {};
@@ -150,14 +163,16 @@ struct client_data
void initialize();
void initialize_event_info();
void set_agents(agent_vec_t&& agents);
void set_agents();
context_id_vec_t get_contexts() const;
buffer_id_vec_t get_buffers() const;
const rocprofiler_agent_t* get_agent(rocprofiler_agent_id_t _id) const;
const rocprofsys_agent_t* get_agent(rocprofiler_agent_id_t _id) const;
const tool_agent* get_gpu_tool_agent(rocprofiler_agent_id_t id) const;
const kernel_symbol_data_t* get_kernel_symbol_info(uint64_t _kernel_id) const;
const rocprofiler_tool_counter_info_t* get_tool_counter_info(
rocprofiler_agent_id_t _agent_id, rocprofiler_counter_id_t _counter_id) const;
const rocprofiler_callback_tracing_code_object_load_data_t* get_code_object_info(
uint64_t code_object_id) const;
};
inline client_data::context_id_vec_t
@@ -175,23 +190,24 @@ client_data::get_buffers() const
return buffer_id_vec_t{
kernel_dispatch_buffer,
memory_copy_buffer,
memory_alloc_buffer,
counter_collection_buffer,
};
}
inline const rocprofiler_agent_t*
inline const rocprofsys_agent_t*
client_data::get_agent(rocprofiler_agent_id_t _id) const
{
for(const auto& itr : agents)
if(itr.id == _id) return &itr;
return nullptr;
const auto& agent = agent_manager::get_instance().get_agent_by_handle(_id.handle);
return &agent;
}
inline const tool_agent*
client_data::get_gpu_tool_agent(rocprofiler_agent_id_t id) const
{
for(const auto& itr : gpu_agents)
if(id == itr.agent->id) return &itr;
if(id.handle == itr.agent->handle) return &itr;
return nullptr;
}
@@ -223,6 +239,24 @@ client_data::get_tool_counter_info(rocprofiler_agent_id_t _agent_id,
return nullptr;
}
inline const rocprofiler_callback_tracing_code_object_load_data_t*
client_data::get_code_object_info(uint64_t code_object_id) const
{
return code_object_records.rlock(
[code_object_id](const auto& _data)
-> const rocprofiler_callback_tracing_code_object_load_data_t* {
for(const auto& itr : _data)
{
if(code_object_id == itr.payload.code_object_id)
{
return &itr.payload;
break;
}
}
return nullptr;
});
}
inline constexpr client_data*
as_client_data(void* _ptr)
{
+86 -50
Просмотреть файл
@@ -31,13 +31,13 @@
#include "core/rocpd/data_processor.hpp"
#include "core/rocpd/json.hpp"
#include "core/state.hpp"
#include "core/trace_cache/cache_manager.hpp"
#include "core/utility.hpp"
#include "library/components/backtrace.hpp"
#include "library/components/backtrace_metrics.hpp"
#include "library/components/backtrace_timestamp.hpp"
#include "library/components/callchain.hpp"
#include "library/perf.hpp"
#include "library/ptl.hpp"
#include "library/runtime.hpp"
#include "library/thread_data.hpp"
#include "library/thread_info.hpp"
@@ -59,6 +59,7 @@
#include <timemory/mpl/quirks.hpp>
#include <timemory/mpl/type_traits.hpp>
#include <timemory/operations.hpp>
#include <timemory/process/threading.hpp>
#include <timemory/sampling/allocator.hpp>
#include <timemory/sampling/overflow.hpp>
#include <timemory/sampling/sampler.hpp>
@@ -83,6 +84,7 @@
#include <regex>
#include <sstream>
#include <string>
#include <string_view>
#include <type_traits>
#include <pthread.h>
@@ -227,46 +229,65 @@ get_data_processor()
return rocpd::data_processor::get_instance();
}
template <typename Category>
std::string
get_track_name(const thread_info& _thread_info)
{
size_t thread_id = _thread_info.index_data->system_value;
size_t sequent_value = _thread_info.index_data->sequent_value;
constexpr auto sample_type =
std::is_same_v<Category, category::timer_sampling> ? "Timer" : "Overflow";
return JOIN(" ", "Thread", sequent_value, sample_type, "(S)", thread_id);
}
void
rocpd_initialize_sampling_category()
{
static bool _is_initialized = false;
if(_is_initialized) return;
auto& data_processor = get_data_processor();
data_processor.insert_category(ROCPROFSYS_CATEGORY_SAMPLING,
trait::name<category::sampling>::value);
data_processor.insert_category(ROCPROFSYS_CATEGORY_OVERFLOW_SAMPLING,
trait::name<category::overflow_sampling>::value);
data_processor.insert_category(ROCPROFSYS_CATEGORY_TIMER_SAMPLING,
trait::name<category::timer_sampling>::value);
trace_cache::get_metadata_registry().add_string(
trait::name<category::sampling>::value);
trace_cache::get_metadata_registry().add_string(
trait::name<category::overflow_sampling>::value);
trace_cache::get_metadata_registry().add_string(
trait::name<category::timer_sampling>::value);
_is_initialized = true;
}
size_t
void
rocpd_initialize_thread_info(size_t tid)
{
const auto& _thread_info = thread_info::get(tid, SequentTID);
ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", tid);
if(!_thread_info) return -1;
if(!_thread_info) return;
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
return data_processor.insert_thread_info(
n_info.id, getppid(), getpid(), _thread_info->index_data->system_value,
threading::get_thread_name().c_str(), _thread_info->get_start(),
_thread_info->get_stop(), "{}");
trace_cache::get_metadata_registry().add_thread_info(
{ getppid(), getpid(),
static_cast<size_t>(_thread_info->index_data->system_value),
static_cast<uint32_t>(_thread_info->get_start()),
static_cast<uint32_t>(_thread_info->get_stop()), "{}" });
}
void
rocpd_init_track(const char* track_name, int64_t tid)
rocpd_init_track(int64_t tid)
{
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
const auto& _thread_info = thread_info::get(tid, SequentTID);
ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", tid);
if(!_thread_info) return;
data_processor.insert_track(track_name, n_info.id, getpid(), tid, "{}");
size_t thread_id = _thread_info->index_data->system_value;
const auto& _timer_track_name =
get_track_name<category::timer_sampling>(*_thread_info);
const auto& _overflow_track_name =
get_track_name<category::overflow_sampling>(*_thread_info);
trace_cache::get_metadata_registry().add_track(
{ _timer_track_name, thread_id, "{}" });
trace_cache::get_metadata_registry().add_track(
{ _overflow_track_name, thread_id, "{}" });
}
template <typename Category>
@@ -275,11 +296,12 @@ rocpd_insert_region(size_t thread_id, size_t start_time, size_t end_time, size_t
const char* track, const char* call_stack = "{}",
const char* line_info = "{}", const char* extdata = "{}")
{
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
auto& data_processor = get_data_processor();
auto& n_info = node_info::get_instance();
auto string_primary_key = data_processor.insert_string(trait::name<Category>::value);
auto event_id = data_processor.insert_event(category_enum_id<Category>::value, 0, 0,
0, call_stack, line_info, extdata);
auto event_id = data_processor.insert_event(string_primary_key, 0, 0, 0, call_stack,
line_info, extdata);
data_processor.insert_region(n_info.id, getpid(), thread_id, start_time, end_time,
name_id, event_id);
@@ -849,6 +871,9 @@ configure(bool _setup, int64_t _tid)
}
}
}
rocpd_initialize_sampling_category();
rocpd_initialize_thread_info(_tid);
rocpd_init_track(_tid);
*_running = true;
sampling::get_sampler_init(_tid)->sample();
@@ -1403,7 +1428,7 @@ post_process_perfetto(int64_t _tid, const std::vector<timer_sampling_data>& _tim
{
auto _ncur = _ncount++;
// the begin/end + HW counters will be same for entire call-stack so only
// annotate the top and the bottom functons to keep the data consumption
// annotate the top and the bottom functions to keep the data consumption
// low
bool _include_common = (_ncur == 0 || _ncur + 1 == itr.m_stack.size());
@@ -1707,14 +1732,14 @@ rocpd_post_process_overflow_data(
.first->c_str();
auto main_name_id = data_processor.insert_string(_main_name);
const auto& _track_name =
JOIN(" ", "Thread", _thread_info->index_data->sequent_value, "Overflow",
"(S)", _thread_info->index_data->system_value);
size_t thread_id = _thread_info->index_data->system_value;
auto thread_primary_key = data_processor.map_thread_id_to_primary_key(thread_id);
const auto _track_name =
get_track_name<category::overflow_sampling>(*_thread_info);
auto thread_idx = rocpd_initialize_thread_info(_tid);
rocpd_init_track(_track_name.c_str(), thread_idx);
rocpd_insert_region<category::overflow_sampling>(
thread_idx, _beg_ns, _end_ns, main_name_id, _track_name.c_str());
thread_primary_key, _beg_ns, _end_ns, main_name_id, _track_name.c_str());
for(const auto& itr : _overflow_data)
{
@@ -1729,7 +1754,7 @@ rocpd_post_process_overflow_data(
static_strings.emplace(demangle(iitr.name)).first->c_str();
auto name_id = data_processor.insert_string(_name);
rocpd_insert_region<category::overflow_sampling>(
thread_idx, _beg, _end, name_id, _track_name.c_str(),
thread_primary_key, _beg, _end, name_id, _track_name.c_str(),
generate_call_stack_json(iitr).c_str(),
generate_line_info_json(iitr).c_str());
}
@@ -1738,9 +1763,11 @@ rocpd_post_process_overflow_data(
}
void
rocpd_post_process_backtrace_metrics(int64_t _tid,
const std::vector<timer_sampling_data>& _timer_data)
rocpd_post_process_backtrace_metrics(
[[maybe_unused]] int64_t _tid,
[[maybe_unused]] const std::vector<timer_sampling_data>& _timer_data)
{
#if ROCPROFSYS_USE_ROCM > 0
auto _valid_metrics = backtrace_metrics::valid_array_t{};
for(const auto& itr : _timer_data)
@@ -1752,17 +1779,20 @@ rocpd_post_process_backtrace_metrics(int64_t _ti
{
ROCPROFSYS_VERBOSE(3 || get_debug_sampling(),
"[%li] Post-processing metrics for rocpd...\n", _tid);
backtrace_metrics::init_rocpd(_tid, _valid_metrics);
backtrace_metrics::init_rocpd(_tid, _valid_metrics); // move to setup
for(const auto& itr : _timer_data)
itr.m_metrics.post_process_rocpd(_tid, 0.5 * (itr.m_beg + itr.m_end));
backtrace_metrics::fini_rocpd(_tid, _valid_metrics);
}
#endif
}
void
rocpd_post_process_timer_data(int64_t _tid,
const std::vector<timer_sampling_data>& _timer_data)
rocpd_post_process_timer_data(
[[maybe_unused]] int64_t _tid,
[[maybe_unused]] const std::vector<timer_sampling_data>& _timer_data)
{
#if ROCPROFSYS_USE_ROCM > 0
auto& data_processor = get_data_processor();
const auto& _thread_info = thread_info::get(_tid, SequentTID);
@@ -1776,15 +1806,14 @@ rocpd_post_process_timer_data(int64_t _tid,
auto _beg_ns = std::max(_timer_data.front().m_beg, _thread_info->get_start());
auto _end_ns = std::min(_timer_data.back().m_end, _thread_info->get_stop());
const auto _track_name =
JOIN(" ", "Thread", _thread_info->index_data->sequent_value, "(S)",
_thread_info->index_data->system_value);
const auto _track_name = get_track_name<category::timer_sampling>(*_thread_info);
auto thread_primary_key = data_processor.map_thread_id_to_primary_key(
_thread_info->index_data->system_value);
auto thread_idx = rocpd_initialize_thread_info(_tid);
rocpd_init_track(_track_name.c_str(), thread_idx);
const auto main_name_id = data_processor.insert_string("samples [rocprof-sys]");
rocpd_insert_region<category::timer_sampling>(thread_idx, _beg_ns, _end_ns,
main_name_id, _track_name.c_str());
rocpd_insert_region<category::timer_sampling>(
thread_primary_key, _beg_ns, _end_ns, main_name_id, _track_name.c_str());
auto _labels = backtrace_metrics::get_hw_counter_labels(_tid);
for(const auto& itr : _timer_data)
@@ -1798,7 +1827,7 @@ rocpd_post_process_timer_data(int64_t _tid,
{
auto _ncur = _ncount++;
// the begin/end + HW counters will be same for entire call-stack so only
// annotate the top and the bottom functons to keep the data consumption
// annotate the top and the bottom functions to keep the data consumption
// low
bool _include_common = (_ncur == 0 || _ncur + 1 == itr.m_stack.size());
@@ -1832,8 +1861,9 @@ rocpd_post_process_timer_data(int64_t _tid,
inlined_call_stack->set("inlined", "true");
rocpd_insert_region<category::timer_sampling>(
thread_idx, _beg, _end, inlined_name_id, _track_name.c_str(),
inlined_call_stack->to_string().c_str(), "{}",
thread_primary_key, _beg, _end, inlined_name_id,
_track_name.c_str(), inlined_call_stack->to_string().c_str(),
"{}",
// Only include HW counters for first inlined function
(_n == 0) ? hw_counter_json.c_str() : "{}");
}
@@ -1843,22 +1873,28 @@ rocpd_post_process_timer_data(int64_t _tid,
const auto* _name = static_strings.emplace(iitr.name).first->c_str();
const auto name_id = data_processor.insert_string(_name);
rocpd_insert_region<category::timer_sampling>(
thread_idx, _beg, _end, name_id, _track_name.c_str(),
thread_primary_key, _beg, _end, name_id, _track_name.c_str(),
generate_call_stack_json(iitr).c_str(),
generate_line_info_json(iitr).c_str(), hw_counter_json.c_str());
}
}
}
}
#endif
}
void
post_process_rocpd(int64_t _tid, const std::vector<timer_sampling_data>& _timer_data,
const std::vector<overflow_sampling_data>& _overflow_data)
{
rocpd_initialize_sampling_category();
#if ROCPROFSYS_USE_ROCM > 0
rocpd_post_process_overflow_data(_tid, _overflow_data);
rocpd_post_process_timer_data(_tid, _timer_data);
#else
(void) _tid;
(void) _timer_data;
(void) _overflow_data;
#endif
}
struct sampling_initialization