From 1f86010ca2aa5e640276f0eeee33cfaa8317c355 Mon Sep 17 00:00:00 2001 From: "systems-assistant[bot]" <221163467+systems-assistant[bot]@users.noreply.github.com> Date: Tue, 19 Aug 2025 22:01:04 -0400 Subject: [PATCH] ROCpd support [Part 2] (#109) * Rocpd part 2, caching * Fix shadowed variables * backward compatibility * Fixed designated initializers * Fix timemory include * Remove benchmark & Fix build issues for rhel * Add missing bracket * Fix shadowing and pedantic * Fix pedantic pt2 * Fix duplicated SDK calls * Add decay in get_size_impl * Rename sample cache to trace cache * Add cache storage supported types * Resolving track naming in sampling module * fix sampling of flushing thread * fix sampling of flushing thread 2 * throw exception upon store while buffer storage is not running * Prevent fork crashing * Fix rebase issue * Applied suggestions from code review * Change flushing thread to use PTL * Fix agent creation order * Fix stream id ci throw * Remove force setup of rocprofiler-sdk * Code cleanup * Change initialization for agent * Add missing namespace * Fix the mismatch within the tool_agent->device_id * Switch from using handle to use agent type index * Fix pmc info comparator in metadata registry --------- Co-authored-by: Aleksandar Co-authored-by: Milan Radosavljevic Co-authored-by: Marjan Antic --- projects/rocprofiler-systems/CMakeLists.txt | 7 - .../source/lib/core/CMakeLists.txt | 29 + .../source/lib/core/agent.hpp | 5 +- .../source/lib/core/agent_manager.cpp | 25 +- .../source/lib/core/agent_manager.hpp | 3 +- .../source/lib/core/benchmark/benchmark.hpp | 353 ----------- .../source/lib/core/benchmark/category.hpp | 68 --- .../source/lib/core/categories.hpp | 2 + .../source/lib/core/cpu.cpp | 23 + .../source/lib/core/gpu.cpp | 25 +- .../source/lib/core/rocpd/CMakeLists.txt | 22 + .../source/lib/core/rocpd/data_processor.cpp | 99 +-- .../source/lib/core/rocpd/data_processor.hpp | 23 +- .../lib/core/rocpd/data_storage/database.cpp | 7 +- .../source/lib/core/rocprofiler-sdk.cpp | 13 + .../lib/core/trace_cache/CMakeLists.txt | 44 ++ .../lib/core/trace_cache/buffer_storage.cpp | 178 ++++++ .../lib/core/trace_cache/buffer_storage.hpp | 162 +++++ .../lib/core/trace_cache/cache_manager.cpp | 79 +++ .../lib/core/trace_cache/cache_manager.hpp | 67 ++ .../lib/core/trace_cache/cache_utility.hpp | 45 ++ .../core/trace_cache/metadata_registry.cpp | 296 +++++++++ .../core/trace_cache/metadata_registry.hpp | 218 +++++++ .../trace_cache/rocpd_post_processing.cpp | 574 ++++++++++++++++++ .../trace_cache/rocpd_post_processing.hpp | 60 ++ .../lib/core/trace_cache/sample_type.hpp | 198 ++++++ .../lib/core/trace_cache/storage_parser.cpp | 230 +++++++ .../lib/core/trace_cache/storage_parser.hpp | 83 +++ .../source/lib/rocprof-sys-dl/dl.cpp | 16 +- .../rocprofiler-systems/categories.h | 1 + .../source/lib/rocprof-sys/library.cpp | 45 +- .../lib/rocprof-sys/library/amd_smi.cpp | 105 ++-- .../library/components/backtrace_metrics.cpp | 280 +++++---- .../library/components/comm_data.cpp | 158 ++--- .../lib/rocprof-sys/library/cpu_freq.cpp | 185 +++--- .../lib/rocprof-sys/library/kokkosp.cpp | 71 +-- .../rocprof-sys/library/rocprofiler-sdk.cpp | 549 +++++++++++++++-- .../library/rocprofiler-sdk/fwd.cpp | 76 +-- .../library/rocprofiler-sdk/fwd.hpp | 56 +- .../lib/rocprof-sys/library/sampling.cpp | 136 +++-- 40 files changed, 3432 insertions(+), 1184 deletions(-) delete mode 100644 projects/rocprofiler-systems/source/lib/core/benchmark/benchmark.hpp delete mode 100644 projects/rocprofiler-systems/source/lib/core/benchmark/category.hpp create mode 100644 projects/rocprofiler-systems/source/lib/core/trace_cache/CMakeLists.txt create mode 100644 projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.cpp create mode 100644 projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.hpp create mode 100644 projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp create mode 100644 projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.hpp create mode 100644 projects/rocprofiler-systems/source/lib/core/trace_cache/cache_utility.hpp create mode 100644 projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.cpp create mode 100644 projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.hpp create mode 100644 projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.cpp create mode 100644 projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.hpp create mode 100644 projects/rocprofiler-systems/source/lib/core/trace_cache/sample_type.hpp create mode 100644 projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.cpp create mode 100644 projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.hpp diff --git a/projects/rocprofiler-systems/CMakeLists.txt b/projects/rocprofiler-systems/CMakeLists.txt index 605b070f0d..123e04ebc1 100644 --- a/projects/rocprofiler-systems/CMakeLists.txt +++ b/projects/rocprofiler-systems/CMakeLists.txt @@ -224,9 +224,6 @@ rocprofiler_systems_add_option(ROCPROFSYS_INSTALL_PERFETTO_TOOLS rocprofiler_systems_add_option(ROCPROFILER_BUILD_SQLITE3 "Enable building sqlite3 library internally" OFF ) -rocprofiler_systems_add_option(ROCPROFSYS_ENABLE_BENCHMARK - "Enable performance benchmarking capabilities for the project" OFF -) if(ROCPROFSYS_USE_PAPI) rocprofiler_systems_add_option(ROCPROFSYS_BUILD_PAPI "Build PAPI from submodule" ON) @@ -334,10 +331,6 @@ if(ROCPROFSYS_BUILD_TESTING OR "$ENV{ROCPROFSYS_CI}" MATCHES "[1-9]+|ON|on|y|yes include(CTest) endif() -if(ROCPROFSYS_ENABLE_BENCHMARK) - add_compile_definitions(-DROCPROFSYS_USE_BENCHMARK=1) -endif() - # ------------------------------------------------------------------------------# # # library and executables diff --git a/projects/rocprofiler-systems/source/lib/core/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/core/CMakeLists.txt index 693002a72d..51eb738b79 100644 --- a/projects/rocprofiler-systems/source/lib/core/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/core/CMakeLists.txt @@ -1,3 +1,25 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + # configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/defines.hpp.in @@ -25,6 +47,8 @@ set(core_sources ${CMAKE_CURRENT_LIST_DIR}/state.cpp ${CMAKE_CURRENT_LIST_DIR}/timemory.cpp ${CMAKE_CURRENT_LIST_DIR}/utility.cpp + ${CMAKE_CURRENT_LIST_DIR}/agent_manager.cpp + ${CMAKE_CURRENT_LIST_DIR}/node_info.cpp ) set(core_headers @@ -53,6 +77,9 @@ set(core_headers ${CMAKE_CURRENT_LIST_DIR}/state.hpp ${CMAKE_CURRENT_LIST_DIR}/timemory.hpp ${CMAKE_CURRENT_LIST_DIR}/utility.hpp + ${CMAKE_CURRENT_LIST_DIR}/agent.hpp + ${CMAKE_CURRENT_LIST_DIR}/agent_manager.hpp + ${CMAKE_CURRENT_LIST_DIR}/node_info.hpp ) add_library(rocprofiler-systems-core-library STATIC) @@ -69,6 +96,7 @@ add_subdirectory(binary) add_subdirectory(components) add_subdirectory(containers) add_subdirectory(rocpd) +add_subdirectory(trace_cache) target_include_directories( rocprofiler-systems-core-library @@ -94,6 +122,7 @@ target_link_libraries( $ $ $ + $ $ $ $ diff --git a/projects/rocprofiler-systems/source/lib/core/agent.hpp b/projects/rocprofiler-systems/source/lib/core/agent.hpp index f942e301e5..6e8babd41b 100644 --- a/projects/rocprofiler-systems/source/lib/core/agent.hpp +++ b/projects/rocprofiler-systems/source/lib/core/agent.hpp @@ -43,7 +43,8 @@ enum class agent_type : uint8_t struct agent { agent_type type; - uint64_t id; + uint64_t handle; + uint64_t device_id; uint32_t node_id; int32_t logical_node_id; int32_t logical_node_type_id; @@ -52,7 +53,7 @@ struct agent std::string vendor_name; std::string product_name; - size_t device_id{ 0 }; + size_t device_type_index{ 0 }; size_t base_id{ 0 }; #if ROCPROFSYS_USE_ROCM > 0 amdsmi_processor_handle smi_handle = nullptr; diff --git a/projects/rocprofiler-systems/source/lib/core/agent_manager.cpp b/projects/rocprofiler-systems/source/lib/core/agent_manager.cpp index b7086931ee..cfe11b512e 100644 --- a/projects/rocprofiler-systems/source/lib/core/agent_manager.cpp +++ b/projects/rocprofiler-systems/source/lib/core/agent_manager.cpp @@ -44,11 +44,30 @@ agent_manager::insert_agent(agent& _agent) (_agent.type == agent_type::GPU ? _gpu_agents_cnt : _cpu_agents_cnt), (_agent.type == agent_type::GPU ? "GPU" : "CPU")); - _agent.device_id = + _agent.device_type_index = (_agent.type == agent_type::GPU ? _gpu_agents_cnt++ : _cpu_agents_cnt++); _agents.emplace_back(std::make_shared(_agent)); } +const agent& +agent_manager::get_agent_by_type_index(size_t type_index, agent_type type) const +{ + ROCPROFSYS_VERBOSE(3, "Getting agent for type: %s, with type index: %ld\n", + (type == agent_type::GPU) ? "GPU" : "CPU", type_index); + auto _agent = + std::find_if(_agents.begin(), _agents.end(), [&](const auto& agent_ptr) { + return agent_ptr->type == type && agent_ptr->device_type_index == type_index; + }); + if(_agent == _agents.end()) + { + std::ostringstream oss; + oss << "Agent not found for type index: " << type_index + << ", type: " << (type == agent_type::GPU ? "GPU" : "CPU"); + throw std::out_of_range(oss.str()); + } + return **_agent; +} + const agent& agent_manager::get_agent_by_id(size_t device_id, agent_type type) const { @@ -75,7 +94,7 @@ agent_manager::get_agent_by_handle(uint64_t device_handle, agent_type type) cons device_handle, (type == agent_type::GPU ? "GPU" : "CPU")); auto _agent = std::find_if(_agents.begin(), _agents.end(), [&](const auto& agent_ptr) { - return agent_ptr->type == type && agent_ptr->id == device_handle; + return agent_ptr->type == type && agent_ptr->handle == device_handle; }); if(_agent == _agents.end()) { @@ -93,7 +112,7 @@ agent_manager::get_agent_by_handle(size_t device_handle) const ROCPROFSYS_VERBOSE(3, "Getting agent for device handle: %ld\n", device_handle); auto _agent = std::find_if(_agents.begin(), _agents.end(), [&](const auto& agent_ptr) { - return agent_ptr->id == device_handle; + return agent_ptr->handle == device_handle; }); if(_agent == _agents.end()) { diff --git a/projects/rocprofiler-systems/source/lib/core/agent_manager.hpp b/projects/rocprofiler-systems/source/lib/core/agent_manager.hpp index d49305ad51..ee3b556514 100644 --- a/projects/rocprofiler-systems/source/lib/core/agent_manager.hpp +++ b/projects/rocprofiler-systems/source/lib/core/agent_manager.hpp @@ -41,8 +41,9 @@ struct agent_manager ~agent_manager() = default; void insert_agent(agent& agent); + const agent& get_agent_by_type_index(size_t type_index, agent_type type) const; const agent& get_agent_by_id(size_t device_id, agent_type type) const; - const agent& get_agent_by_handle(size_t device_id, agent_type type) const; + const agent& get_agent_by_handle(size_t device_handle, agent_type type) const; const agent& get_agent_by_handle(size_t device_handle) const; std::vector> get_agents_by_type(agent_type type) const; diff --git a/projects/rocprofiler-systems/source/lib/core/benchmark/benchmark.hpp b/projects/rocprofiler-systems/source/lib/core/benchmark/benchmark.hpp deleted file mode 100644 index d8173e2098..0000000000 --- a/projects/rocprofiler-systems/source/lib/core/benchmark/benchmark.hpp +++ /dev/null @@ -1,353 +0,0 @@ -// MIT License -// -// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "core/benchmark/category.hpp" -#include "core/debug.hpp" - -namespace rocprofsys -{ -namespace benchmark -{ -namespace -{ -template -struct benchmark_impl -{ - template - struct scope - { - scope(const scope&) = delete; - scope& operator=(const scope&) = delete; - ~scope() = default; - - protected: - scope() = default; - scope(scope&&) = default; - scope& operator=(scope&&) = default; - }; - - template - static void start() - {} - - template - static void end() - {} - - template - [[nodiscard]] static scope scoped_trace() - { - return scope{}; - } - - static void init_from_env(const char* = nullptr) {} - static void show_results() {} -}; - -using tid_t = __pid_t; -struct indexed_category -{ - size_t category; - tid_t thread_id; - - friend bool operator==(const indexed_category& lhs, const indexed_category& rhs) - { - return lhs.category == rhs.category && lhs.thread_id == rhs.thread_id; - } -}; - -struct indexed_category_hash -{ - size_t operator()(const indexed_category& p) const noexcept - { - std::size_t hash1 = std::hash{}(p.category); - std::size_t hash2 = std::hash{}(p.thread_id); - return hash1 ^ (hash2 << 1); - } -}; - -template -struct benchmark_impl -{ - static_assert(std::is_enum_v, "category_enum must be an enum"); - -public: - using clock = std::chrono::high_resolution_clock; - using time_point = clock::time_point; - static constexpr size_t _max_categories = static_cast(category_enum::count); - - template - struct scope - { - friend benchmark_impl; - - public: - scope(const scope&) = delete; - scope& operator=(const scope&) = delete; - ~scope() { end(); } - - protected: - scope() { start(); } - - scope(scope&&) = default; - scope& operator=(scope&&) = default; - }; - - template - static void start() - { - static const thread_local auto _thread_id = gettid(); - const auto now = clock::now(); - std::lock_guard lock(m_mutex); - (..., (is_category_defined([&] { - if(m_enabled.test(to_index(categories))) - m_started[{ to_index(categories), _thread_id }] = now; - }))); - } - - template - static void end() - { - static const thread_local auto _thread_id = getpid(); - const auto _end_time = clock::now(); - std::lock_guard lock(m_mutex); - (..., (is_category_defined([&] { - if(m_enabled.test(to_index(categories))) - end_category(_end_time, categories, _thread_id); - }))); - } - - template - [[nodiscard]] static scope scoped_trace() - { - return scope{}; - } - - static void init_from_env(const char* envVar = "ROCPROFSYS_BENCHMARK_CATEGORIES") - { - std::lock_guard lock(m_mutex); - const auto* env = std::getenv(envVar); - if(env == nullptr || std::string(env).empty()) - { - ROCPROFSYS_WARNING(1, "No BENCHMARK categories specified in environment " - "variable ROCPROFSYS_BENCHMARK_CATEGORIES.\n"); - return; - } - std::string _str(env); - std::istringstream ss(_str); - std::string token; - - while(std::getline(ss, token, ',')) - { - token.erase(0, token.find_first_not_of(" \t")); - token.erase(token.find_last_not_of(" \t") + 1); - for(category_enum cat : compiledCategories) - { - if(to_string(cat) == token) - { - m_enabled.set(to_index(cat)); - } - } - } - } - - static void show_results() - { - std::lock_guard lock(m_mutex); - std::vector> sorted; - - for(category_enum cat : compiledCategories) - { - const auto& data = m_results[to_index(cat)]; - if(data.count > 0) - { - sorted.emplace_back(cat, data); - } - } - - std::sort(sorted.begin(), sorted.end(), [](const auto& a, const auto& b) { - return a.second.total_time > b.second.total_time; - }); - - constexpr uint32_t _category = 30; - constexpr uint32_t _calls = 8; - constexpr uint32_t _total = 12; - constexpr uint32_t _avg = 10; - constexpr uint32_t _min = 10; - constexpr uint32_t _max = 10; - - std::cout << "\033[32m" - << std::string(_category + _calls + _total + _avg + _min + _max, '=') - << "\n"; - std::cout << "Benchmark Results (Sorted by Total Time):\n"; - std::cout << std::string(_category + _calls + _total + _avg + _min + _max, '-') - << "\n"; - std::cout << std::left << std::setw(_category) << "Category" << std::right - << std::setw(_calls) << "Calls" << std::setw(_total) << "Total(ms)" - << std::setw(_avg) << "Avg(us)" << std::setw(_min) << "Min(us)" - << std::setw(_max) << "Max(us)" << "\n"; - - std::cout << std::string(_category + _calls + _total + _avg + _min + _max, '-') - << "\n"; - - for(const auto& [cat, data] : sorted) - { - double totalMs = static_cast(data.total_time) / 1000.0; - double avgUs = static_cast(data.total_time) / data.count; - - std::cout << std::left << std::setw(_category) << to_string(cat) << std::right - << std::setw(_calls) << data.count << std::setw(_total) - << std::fixed << std::setprecision(3) << totalMs << std::setw(_avg) - << std::fixed << std::setprecision(1) << avgUs << std::setw(_min) - << data.min_time << std::setw(_max) << data.max_time << "\n"; - } - - std::cout << std::string(_category + _calls + _total + _avg + _min + _max, '=') - << "\033[0m" << "\n\n"; - } - -private: - struct result_data - { - uint64_t total_time = 0; - size_t count = 0; - uint64_t min_time = std::numeric_limits::max(); - uint64_t max_time = std::numeric_limits::min(); - - void update(uint64_t duration) - { - total_time += duration; - count += 1; - if(duration < min_time) min_time = duration; - if(duration > max_time) max_time = duration; - } - }; - - static constexpr size_t to_index(category_enum cat) - { - return static_cast(cat); - } - - static void end_category(const time_point& end_time, category_enum cat, - const tid_t thread_id) - { - const size_t _idx = to_index(cat); - auto _it = m_started.find({ _idx, thread_id }); - if(_it == m_started.end()) - { - ROCPROFSYS_WARNING(1, "Benchmark error: missing start time for category!\n"); - return; - } - - auto duration = - std::chrono::duration_cast(end_time - _it->second) - .count(); - m_started.erase(_it); - m_results[_idx].update(duration); - } - - template - static constexpr void is_category_defined(Func&& f) - { - if constexpr(((Cat == enabled_categories) || ...)) - { - f(); - } - } - - static constexpr std::array - compiledCategories = { enabled_categories... }; - - static inline std::unordered_map - m_started; - static inline std::array m_results{}; - static inline std::bitset<_max_categories> m_enabled; - static inline std::mutex m_mutex; -}; - -#ifdef ROCPROFSYS_ENABLE_BENCHMARK -using _benchmark_impl = benchmark::benchmark_impl< - static_cast(ROCPROFSYS_ENABLE_BENCHMARK), benchmark::category, - benchmark::category::kernel_dispatch, benchmark::category::memory_copy, - benchmark::category::memory_allocate, benchmark::category::db_entry_kernel_dispatch, - benchmark::category::db_entry_memory_copy, - benchmark::category::db_entry_memory_allocate, - benchmark::category::perfetto_kernel_dispatch, - benchmark::category::sdk_tool_buffered_tracing>; -#else -using _benchmark_impl = benchmark::benchmark_impl; -#endif -} // namespace - -template -void -start() -{ - _benchmark_impl::template start(); -} - -template -void -end() -{ - _benchmark_impl::template end(); -} - -template -[[nodiscard]] auto -scoped_trace() -{ - return _benchmark_impl::template scoped_trace(); -} - -inline void -init_from_env(const char* envVar = "BENCHMARK_CATEGORIES") -{ - _benchmark_impl::init_from_env(envVar); -} - -inline void -show_results() -{ - _benchmark_impl::show_results(); -} - -} // namespace benchmark -} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/benchmark/category.hpp b/projects/rocprofiler-systems/source/lib/core/benchmark/category.hpp deleted file mode 100644 index 61afe08e08..0000000000 --- a/projects/rocprofiler-systems/source/lib/core/benchmark/category.hpp +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2018-2025 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// with the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// * Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimers. -// -// * Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimers in the -// documentation and/or other materials provided with the distribution. -// -// * Neither the names of Advanced Micro Devices, Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this Software without specific prior written permission. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH -// THE SOFTWARE. - -#pragma once - -#include - -namespace rocprofsys -{ -namespace benchmark -{ -enum class category -{ - kernel_dispatch, - db_entry_kernel_dispatch, - memory_copy, - db_entry_memory_copy, - memory_allocate, - db_entry_memory_allocate, - perfetto_kernel_dispatch, - sdk_tool_buffered_tracing, - count -}; - -constexpr std::string_view -to_string(category cat) -{ - switch(cat) - { - case category::kernel_dispatch: return "kernel_dispatch"; - case category::db_entry_kernel_dispatch: return "db_entry_kernel_dispatch"; - case category::memory_copy: return "memory_copy"; - case category::memory_allocate: return "memory_allocate"; - case category::db_entry_memory_copy: return "db_entry_memory_copy"; - case category::db_entry_memory_allocate: return "db_entry_memory_allocate"; - case category::perfetto_kernel_dispatch: return "perfetto_kernel_dispatch"; - case category::sdk_tool_buffered_tracing: return "sdk_tool_buffered_tracing"; - default: return "unknown"; - } -} - -} // namespace benchmark -} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/categories.hpp b/projects/rocprofiler-systems/source/lib/core/categories.hpp index 57326e2b66..4cb45df5eb 100644 --- a/projects/rocprofiler-systems/source/lib/core/categories.hpp +++ b/projects/rocprofiler-systems/source/lib/core/categories.hpp @@ -96,6 +96,7 @@ ROCPROFSYS_DEFINE_CATEGORY(category, rocm_hip_api, ROCPROFSYS_CATEGORY_ROCM_HIP_ ROCPROFSYS_DEFINE_CATEGORY(category, rocm_hsa_api, ROCPROFSYS_CATEGORY_ROCM_HSA_API, "rocm_hsa_api", "ROCm HSA functions") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_kernel_dispatch, ROCPROFSYS_CATEGORY_ROCM_KERNEL_DISPATCH, "rocm_kernel_dispatch", "ROCm Kernel dispatch") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_memory_copy, ROCPROFSYS_CATEGORY_ROCM_MEMORY_COPY, "rocm_memory_copy", "ROCm Async Memory Copy") +ROCPROFSYS_DEFINE_CATEGORY(category, rocm_memory_allocate, ROCPROFSYS_CATEGORY_ROCM_MEMORY_ALLOCATE, "rocm_memory_allocate", "ROCm Memory Allocations") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_hip_stream, ROCPROFSYS_CATEGORY_ROCM_HIP_STREAM, "rocm_hip_stream", "ROCm HIP Stream") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_scratch_memory, ROCPROFSYS_CATEGORY_ROCM_SCRATCH_MEMORY, "rocm_scratch_memory", "ROCm kernel scratch memory reallocations") ROCPROFSYS_DEFINE_CATEGORY(category, rocm_page_migration, ROCPROFSYS_CATEGORY_ROCM_PAGE_MIGRATION, "rocm_page_migration", "ROCm memory page migration") @@ -167,6 +168,7 @@ using name = perfetto_category; ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_hsa_api), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_kernel_dispatch), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_memory_copy), \ + ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_memory_allocate), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_hip_stream), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_scratch_memory), \ ROCPROFSYS_PERFETTO_CATEGORY(category::rocm_page_migration), \ diff --git a/projects/rocprofiler-systems/source/lib/core/cpu.cpp b/projects/rocprofiler-systems/source/lib/core/cpu.cpp index 5c63dacfed..2a484eccfd 100644 --- a/projects/rocprofiler-systems/source/lib/core/cpu.cpp +++ b/projects/rocprofiler-systems/source/lib/core/cpu.cpp @@ -1,3 +1,25 @@ +// MIT License +// +// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + #include "cpu.hpp" #include "agent_manager.hpp" @@ -153,6 +175,7 @@ query_cpu_agents() auto logical_id = id_count++; auto id = cpu_count++; auto cur_agent = agent{ agent_type::CPU, + 0, id, node_id, logical_id, diff --git a/projects/rocprofiler-systems/source/lib/core/gpu.cpp b/projects/rocprofiler-systems/source/lib/core/gpu.cpp index 18281448c9..e35e15312f 100644 --- a/projects/rocprofiler-systems/source/lib/core/gpu.cpp +++ b/projects/rocprofiler-systems/source/lib/core/gpu.cpp @@ -124,19 +124,20 @@ query_rocm_agents() auto& _agent_manager = agent_manager::get_instance(); for(size_t i = 0; i < num_agents; ++i) { - const auto* _agent = static_cast(agents[i]); - auto cur_agent = agent{ + const auto* _agent = static_cast(agents[i]); + agent cur_agent; + cur_agent.type = (_agent->type == ROCPROFILER_AGENT_TYPE_GPU ? agent_type::GPU - : agent_type::CPU), - _agent->device_id, - _agent->node_id, - _agent->logical_node_id, - _agent->logical_node_type_id, - std::string(_agent->name), - std::string(_agent->vendor_name), - std::string(_agent->product_name), - std::string(_agent->model_name), - }; + : agent_type::CPU); + cur_agent.handle = _agent->id.handle; + cur_agent.device_id = _agent->device_id; + cur_agent.node_id = _agent->node_id; + cur_agent.logical_node_id = _agent->logical_node_id; + cur_agent.logical_node_type_id = _agent->logical_node_type_id; + cur_agent.name = std::string(_agent->name); + cur_agent.model_name = std::string(_agent->model_name); + cur_agent.vendor_name = std::string(_agent->vendor_name); + cur_agent.product_name = std::string(_agent->product_name); _agent_manager.insert_agent(cur_agent); } return ROCPROFILER_STATUS_SUCCESS; diff --git a/projects/rocprofiler-systems/source/lib/core/rocpd/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/core/rocpd/CMakeLists.txt index f6a6576383..db789f7254 100644 --- a/projects/rocprofiler-systems/source/lib/core/rocpd/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/core/rocpd/CMakeLists.txt @@ -1,3 +1,25 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + set(rocpd_sources ${CMAKE_CURRENT_LIST_DIR}/data_processor.cpp ${CMAKE_CURRENT_LIST_DIR}/json.cpp diff --git a/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.cpp b/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.cpp index 6198aa047c..b1ab7cc350 100644 --- a/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.cpp +++ b/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.cpp @@ -69,8 +69,7 @@ data_processor::initialize_metadata() size_t data_processor::insert_string(const char* str) { - std::lock_guard lock(_data_mutex); - auto it = _string_map.find(str); + auto it = _string_map.find(str); if(it != _string_map.end()) return _string_map.at(str); data_storage::queries::table_insert_query query; @@ -242,24 +241,12 @@ data_processor::insert_sample(const char* track, uint64_t timestamp, size_t even } size_t -data_processor::insert_event(size_t category_id, size_t stack_id, size_t parent_stack_id, - size_t correlation_id, const char* call_stack, - const char* line_info, const char* extdata) +data_processor::insert_event(size_t string_primary_key, size_t stack_id, + size_t parent_stack_id, size_t correlation_id, + const char* call_stack, const char* line_info, + const char* extdata) { - std::lock_guard lock(_data_mutex); - auto it = _category_map.find(category_id); - if(it == _category_map.end()) - { - std::ostringstream oss; - oss << "Insert event failed! Error: Unknown category id: " << category_id - << " for UPID: " << _upid; - throw std::runtime_error(oss.str()); - } - - ROCPROFSYS_VERBOSE(3, "Insert event category id: %ld, string id: %ld\n", category_id, - it->second); - - _insert_event_statement(_upid.c_str(), it->second, stack_id, parent_stack_id, + _insert_event_statement(_upid.c_str(), string_primary_key, stack_id, parent_stack_id, correlation_id, call_stack, line_info, extdata); return data_storage::database::get_instance().get_last_insert_id(); } @@ -456,7 +443,6 @@ void data_processor::insert_args(size_t event_id, size_t position, const char* type, const char* name, const char* value, const char* extdata) { - std::lock_guard lock(_data_mutex); _insert_args_statement(_upid.c_str(), event_id, position, type, name, value, extdata); } @@ -464,40 +450,24 @@ void data_processor::insert_stream_info(size_t stream_id, size_t node_id, size_t process_id, const char* name, const char* extdata) { - if(_stream_ids.count(stream_id) > 0) - { - // ROCPROFSYS_WARNING( - // 1, "Insert stream info failed! Error: Stream ID %ld already exists!\n", - // stream_id); - return; - } data_storage::queries::table_insert_query query; data_storage::database::get_instance().execute_query( query.set_table_name("rocpd_info_stream_" + _upid) .set_columns("id", "guid", "nid", "pid", "name", "extdata") .set_values(stream_id, _upid, node_id, process_id, name, extdata) .get_query_string()); - _stream_ids.insert(stream_id); } void data_processor::insert_queue_info(size_t queue_id, size_t node_id, size_t process_id, const char* name, const char* extdata) { - if(_queue_ids.count(queue_id) > 0) - { - // ROCPROFSYS_WARNING( - // 1, "Insert queue info failed! Error: Queue ID %ld already exists!\n", - // queue_id); - return; - } data_storage::queries::table_insert_query query; data_storage::database::get_instance().execute_query( query.set_table_name("rocpd_info_queue_" + _upid) .set_columns("id", "guid", "nid", "pid", "name", "extdata") .set_values(queue_id, _upid, node_id, process_id, name, extdata) .get_query_string()); - _queue_ids.insert(queue_id); } void @@ -506,20 +476,9 @@ data_processor::insert_code_object(size_t id, size_t node_id, size_t process_id, uint64_t ld_size, uint64_t ld_delta, const char* storage_type, const char* extdata) { - if(_code_object_ids.count(id) > 0) - { - // ROCPROFSYS_WARNING( - // 1, - // "Insert code object info failed! Error: Code object ID %ld already - // exists!\n", id); - return; - } ROCPROFSYS_VERBOSE(2, "Insert code object with ID: %ld\n", id); - std::lock_guard lock(_data_mutex); _insert_code_object_statement(id, _upid.c_str(), node_id, process_id, agent_id, uri, ld_base, ld_size, ld_delta, storage_type, extdata); - - _code_object_ids.insert(id); } void @@ -530,40 +489,11 @@ data_processor::insert_kernel_symbol( uint32_t private_segment_size, uint32_t sgrp_count, uint32_t arch_vgrp_count, uint32_t accum_vgrp_count, const char* extdata) { - if(_kernel_sym_ids.count(id) > 0) - { - // ROCPROFSYS_WARNING( - // 1, - // "Insert kernel symbol failed! Error: Kernel symbol ID %ld already - // exists!\n", id); - return; - } - ROCPROFSYS_VERBOSE(2, "Insert kernel symbol: %s with ID: %ld\n", name, id); - std::lock_guard lock(_data_mutex); _insert_kernel_symbol_statement( id, _upid.c_str(), node_id, process_id, code_obj_id, name, display_name, kernel_obj, kernarg_segmnt_size, kernarg_segment_alignment, group_segment_size, private_segment_size, sgrp_count, arch_vgrp_count, accum_vgrp_count, extdata); - - _kernel_sym_ids.insert(id); -} - -void -data_processor::insert_category(size_t category_id, const char* name) -{ - auto it = _category_map.find(category_id); - if(it != _category_map.end()) - { - // ROCPROFSYS_WARNING( - // 1, "Insert category failed! Error: Category %s already exist!\n", name); - return; - } - auto name_id = insert_string(name); - std::lock_guard lock(_data_mutex); - ROCPROFSYS_VERBOSE(2, "Insert category: name: %s, id: %ld, name id: %ld\n", name, - category_id, name_id); - _category_map.emplace(category_id, name_id); } void @@ -571,7 +501,6 @@ data_processor::insert_region(size_t node_id, size_t process_id, size_t thread_i uint64_t start, uint64_t end, size_t name_id, size_t event_id, const char* extdata) { - std::lock_guard lock(_data_mutex); ROCPROFSYS_VERBOSE(2, "Insert region for event id: %ld\n", event_id); _insert_region_statement(_upid.c_str(), node_id, process_id, thread_id, start, end, @@ -587,8 +516,6 @@ data_processor::insert_kernel_dispatch( size_t grid_size_x, size_t grid_size_y, size_t grid_size_z, size_t region_name_id, size_t event_id, const char* extdata) { - std::lock_guard lock(_data_mutex); - ROCPROFSYS_VERBOSE(2, "Insert kernel dispatch for event id: %ld\n", event_id); _insert_kernel_dispatch_statement( @@ -607,8 +534,6 @@ data_processor::insert_memory_copy(size_t node_id, size_t process_id, size_t thr size_t region_name_id, size_t event_id, const char* extdata) { - std::lock_guard lock(_data_mutex); - _insert_memory_copy_statement(_upid.c_str(), node_id, process_id, thread_id, start, end, name_id, dst_agent_id, dst_addr, src_agent_id, src_addr, size, queue_id, stream_id, region_name_id, @@ -663,6 +588,18 @@ data_processor::insert_thread_info(size_t node_id, size_t parent_process_id, return thread_idx; } +size_t +data_processor::map_thread_id_to_primary_key(size_t thread_id) +{ + auto it = _thread_id_map.find(thread_id); + + if(it == _thread_id_map.end()) + { + throw std::invalid_argument("Given thread id don't exist"); + } + return _thread_id_map.at(thread_id); +} + void data_processor::flush() { diff --git a/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.hpp b/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.hpp index 0e4782dbb8..0eeb9a533f 100644 --- a/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.hpp +++ b/projects/rocprofiler-systems/source/lib/core/rocpd/data_processor.hpp @@ -39,7 +39,7 @@ struct data_processor using insert_event_stmt = std::function; - using insert_pmc_event_stms = + using insert_pmc_event_stmt = std::function; using insert_sample_stmt = std::function; @@ -124,9 +124,10 @@ public: void insert_track(const char* track_name, size_t node_id, size_t process_id, std::optional thread_id, const char* extdata = "{}"); - size_t insert_event(size_t category_id, size_t stack_id, size_t parent_stack_id, - size_t correlation_id, const char* call_stack = "{}", - const char* line_info = "{}", const char* extdata = "{}"); + size_t insert_event(size_t string_primary_key, size_t stack_id, + size_t parent_stack_id, size_t correlation_id, + const char* call_stack = "{}", const char* line_info = "{}", + const char* extdata = "{}"); void insert_pmc_event(size_t event_id, size_t agent_id, const char* pmc_descriptor, double value, const char* extdata = "{}"); @@ -143,8 +144,6 @@ public: void insert_sample(const char* track, uint64_t timestamp, size_t event_id, const char* extdata = "{}"); - void insert_category(size_t category_id, const char* name); - void insert_region(size_t node_id, size_t process_id, size_t thread_id, uint64_t start, uint64_t end, size_t name_id, size_t event_id, const char* extdata = "{}"); @@ -199,6 +198,8 @@ public: size_t stream_id, size_t event_id, const char* extdata = "{}"); + size_t map_thread_id_to_primary_key(size_t thread_id); + void flush(); private: @@ -223,16 +224,10 @@ private: std::unordered_map _pmc_descriptor_map; std::unordered_map _thread_id_map; - std::unordered_map _category_map; std::unordered_map _string_map; - std::set _code_object_ids; - std::set _kernel_sym_ids; - std::set _stream_ids; - std::set _queue_ids; - insert_event_stmt _insert_event_statement; - insert_pmc_event_stms _insert_pmc_event_statement; + insert_pmc_event_stmt _insert_pmc_event_statement; insert_sample_stmt _insert_sample_statement; insert_region_stmt _insert_region_statement; insert_kernel_dispatch_stmt _insert_kernel_dispatch_statement; @@ -244,8 +239,6 @@ private: insert_memory_alloc_no_agent_stmt _insert_memory_alloc_no_agent_statement; std::string _upid{}; - - std::mutex _data_mutex; }; } // namespace rocpd diff --git a/projects/rocprofiler-systems/source/lib/core/rocpd/data_storage/database.cpp b/projects/rocprofiler-systems/source/lib/core/rocpd/data_storage/database.cpp index d0b6f58778..9cd99925ff 100644 --- a/projects/rocprofiler-systems/source/lib/core/rocpd/data_storage/database.cpp +++ b/projects/rocprofiler-systems/source/lib/core/rocpd/data_storage/database.cpp @@ -93,9 +93,8 @@ database::initialize_schema() return new_file_path; } } - return std::string( - "rocprofiler-systems/source/lib/core/rocpd/data_storage/schema/") - .append(filename); + // TODO: Update to look for the system's rocpd schema + return std::string("source/lib/core/rocpd/data_storage/schema/").append(filename); }; std::vector schema_files = { "rocpd_tables.sql", "rocpd_views.sql", @@ -118,9 +117,11 @@ database::initialize_schema() std::string query = ss_query.str(); std::regex upid_pattern("\\{\\{uuid\\}\\}"); + std::regex guid_pattern("\\{\\{guid\\}\\}"); std::regex view_upid_pattern("\\{\\{view_upid\\}\\}"); query = std::regex_replace(query, upid_pattern, "_" + get_upid()); + query = std::regex_replace(query, guid_pattern, get_upid()); query = std::regex_replace(query, view_upid_pattern, ""); validate_sqlite3_result( diff --git a/projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.cpp b/projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.cpp index c889fc2526..bfe786920b 100644 --- a/projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.cpp +++ b/projects/rocprofiler-systems/source/lib/core/rocprofiler-sdk.cpp @@ -495,6 +495,9 @@ get_buffered_domains() const auto supported = std::unordered_set{ ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, +# if(ROCPROFILER_VERSION >= 600) + ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION, +# endif # if(ROCPROFILER_VERSION < 10000) ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, # endif @@ -540,6 +543,16 @@ get_buffered_domains() { _data.emplace(ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API); } +# if(ROCPROFILER_VERSION >= 600) + else if(itr == "memory_allocation") + { + _data.emplace(ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION); + } +# endif + else if(itr == "memory_copy") + { + _data.emplace(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY); + } else { for(size_t idx = 0; idx < buffer_tracing_info.size(); ++idx) diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/core/trace_cache/CMakeLists.txt new file mode 100644 index 0000000000..28ef1eccaf --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/CMakeLists.txt @@ -0,0 +1,44 @@ +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(trace_cache_sources + ${CMAKE_CURRENT_LIST_DIR}/cache_manager.cpp + ${CMAKE_CURRENT_LIST_DIR}/storage_parser.cpp + ${CMAKE_CURRENT_LIST_DIR}/buffer_storage.cpp + ${CMAKE_CURRENT_LIST_DIR}/metadata_registry.cpp + ${CMAKE_CURRENT_LIST_DIR}/rocpd_post_processing.cpp +) + +set(trace_cache_headers + ${CMAKE_CURRENT_LIST_DIR}/cache_manager.hpp + ${CMAKE_CURRENT_LIST_DIR}/storage_parser.hpp + ${CMAKE_CURRENT_LIST_DIR}/buffer_storage.hpp + ${CMAKE_CURRENT_LIST_DIR}/cache_utility.hpp + ${CMAKE_CURRENT_LIST_DIR}/metadata_registry.hpp + ${CMAKE_CURRENT_LIST_DIR}/rocpd_post_processing.hpp + ${CMAKE_CURRENT_LIST_DIR}/sample_type.hpp +) + +target_sources( + rocprofiler-systems-core-library + PRIVATE ${trace_cache_sources} ${trace_cache_headers} +) diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.cpp new file mode 100644 index 0000000000..b463835fc2 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.cpp @@ -0,0 +1,178 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "buffer_storage.hpp" +#include "PTL/Task.hh" +#include "PTL/TaskGroup.hh" +#include "PTL/ThreadPool.hh" +#include "debug.hpp" +#include "library/runtime.hpp" +#include +#include +#include +#include +#include + +using namespace std::chrono_literals; + +namespace rocprofsys +{ +namespace trace_cache +{ + +namespace +{ +constexpr auto CACHE_FILE_FLUSH_TIMEOUT = 10ms; +constexpr auto NUM_OF_THREADS = 1; +} // namespace + +buffer_storage::buffer_storage(pid_t _pid) +{ + ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false); + m_thread_pool = std::make_unique(NUM_OF_THREADS); + m_thread_pool->initialize_threadpool(NUM_OF_THREADS); + + m_task_group = std::make_unique>(m_thread_pool.get()); + m_task_group->exec([this, _pid]() { + std::ofstream _ofs(filename, std::ios::binary | std::ios::out); + + if(!_ofs) + { + std::stringstream _ss; + _ss << "Error opening file for writing: " << filename; + throw std::runtime_error(_ss.str()); + } + + auto execute_flush = [&](std::ofstream& ofs, bool force = false) { + size_t _head, _tail; + { + std::lock_guard guard{ m_mutex }; + _head = m_head; + _tail = m_tail; + + if(_head == _tail) + { + return; + } + + auto used_space = + m_head > m_tail ? (m_head - m_tail) : (buffer_size - m_tail + m_head); + if(!force && used_space < flush_threshold) + { + return; + } + m_tail = m_head; + } + + if(_head > _tail) + { + ofs.write(reinterpret_cast(m_buffer->data() + _tail), + _head - _tail); + } + else + { + ofs.write(reinterpret_cast(m_buffer->data() + _tail), + buffer_size - _tail); + ofs.write(reinterpret_cast(m_buffer->data()), _head); + } + }; + + ROCPROFSYS_DEBUG("Starting buffered storage flushing thread for pid %d", + static_cast(_pid)); + m_created_process = _pid; + std::mutex _shutdown_condition_mutex; + while(m_running) + { + execute_flush(_ofs); + std::unique_lock _lock{ _shutdown_condition_mutex }; + m_shutdown_condition.wait_for( + _lock, std::chrono::milliseconds(CACHE_FILE_FLUSH_TIMEOUT), + [&]() { return !m_running; }); + } + + execute_flush(_ofs, true); + _ofs.close(); + m_exit_finished = true; + m_exit_condition.notify_one(); + }); +} + +void +buffer_storage::shutdown() +{ + ROCPROFSYS_DEBUG("Buffer storage shutting down.."); + m_running = false; + m_shutdown_condition.notify_all(); + + if(m_created_process != getpid()) + { + ROCPROFSYS_DEBUG( + "Buffer storage is not created in same process as shutting down.."); + return; + } + + std::mutex _exit_mutex; + std::unique_lock _exit_lock{ _exit_mutex }; + m_exit_condition.wait(_exit_lock, [&]() { return m_exit_finished; }); + m_thread_pool->destroy_threadpool(); +} + +void +buffer_storage::fragment_memory() +{ + auto* _data = m_buffer->data(); + memset(_data + m_head, 0xFFFF, buffer_size - m_head); + *reinterpret_cast(_data + m_head) = entry_type::fragmented_space; + + size_t remaining_bytes = buffer_size - m_head - minimal_fragmented_memory_size; + *reinterpret_cast(_data + m_head + sizeof(entry_type)) = remaining_bytes; + m_head = 0; +} + +uint8_t* +buffer_storage::reserve_memory_space(size_t len) +{ + size_t _size; + { + std::lock_guard scope{ m_mutex }; + + if((m_head + len + minimal_fragmented_memory_size) > buffer_size) + { + fragment_memory(); + } + _size = m_head; + m_head = m_head + len; + } + + auto* _result = m_buffer->data() + _size; + memset(_result, 0, len); + return _result; +} + +bool +buffer_storage::is_running() const +{ + return m_running; +} + +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.hpp new file mode 100644 index 0000000000..1996edbe6d --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.hpp @@ -0,0 +1,162 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "PTL/TaskGroup.hh" +#include "PTL/ThreadPool.hh" +#include "cache_utility.hpp" +#include "sample_type.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace rocprofsys +{ +namespace trace_cache +{ + +class cache_manager; +class buffer_storage +{ +public: + static buffer_storage& get_instance(); + + template + void store(entry_type type, T&&... values) + { + if(!is_running()) + { + throw std::runtime_error( + "Trying to use buffered storage while it is not running"); + return; + } + + constexpr bool is_supported_type = (supported_types::is_supported && ...); + static_assert(is_supported_type, "Supported types are const char*, char*, " + "unsigned long, unsigned int, and int."); + + auto arg_size = get_size(values...); + auto total_size = arg_size + sizeof(type) + sizeof(size_t); + auto* reserved_memory = reserve_memory_space(total_size); + size_t position = 0; + + auto store_value = [&](const auto& val) { + using Type = decltype(val); + size_t len = 0; + auto* dest = reserved_memory + position; + if constexpr(std::is_same_v, const char*>) + { + len = strlen(val) + 1; + std::memcpy(dest, val, len); + } + else + { + using ClearType = std::decay_t; + len = sizeof(ClearType); + *reinterpret_cast(dest) = val; + } + position += len; + }; + + store_value(type); + store_value(arg_size); + + (store_value(values), ...); + } + +private: + friend class cache_manager; + buffer_storage(pid_t _pid); + void shutdown(); + bool is_running() const; + void fragment_memory(); + uint8_t* reserve_memory_space(size_t len); + + template + struct typelist + { + template + constexpr static bool is_supported = + (std::is_same_v, Types> || ...); + }; + + using supported_types = typelist; + + template + static constexpr bool is_string_literal_v = + std::is_same_v, const char*> || + std::is_same_v, char*>; + + template + constexpr size_t get_size_impl(T&& val) + { + if constexpr(is_string_literal_v) + { + size_t size = 0; + while(val[size] != '\0') + { + size++; + } + return ++size; + } + else + { + return sizeof(T); + } + } + + template + constexpr size_t get_size(T&&... val) + { + auto total_size = 0; + ((total_size += get_size_impl(val)), ...); + return total_size; + } + +private: + std::mutex m_mutex; + std::condition_variable m_exit_condition; + bool m_exit_finished{ false }; + bool m_running{ true }; + std::condition_variable m_shutdown_condition; + + std::unique_ptrm_thread_pool; + std::unique_ptr> m_task_group; + size_t m_head{ 0 }; + size_t m_tail{ 0 }; + std::unique_ptr m_buffer{ std::make_unique() }; + pid_t m_created_process; +}; + +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp new file mode 100644 index 0000000000..dbecaf01ca --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp @@ -0,0 +1,79 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "cache_manager.hpp" +#include "core/config.hpp" +#include "core/trace_cache/storage_parser.hpp" +#include "debug.hpp" +#include "trace_cache/rocpd_post_processing.hpp" + +namespace rocprofsys +{ +namespace trace_cache +{ + +cache_manager& +cache_manager::get_instance() +{ + static cache_manager instance; + return instance; +} + +cache_manager::cache_manager() +: m_postprocessing{ m_metadata } +{ + m_postprocessing.register_parser_callback(m_parser); +} + +void +cache_manager::post_process() +{ + if(m_storage.is_running()) + { + ROCPROFSYS_WARNING(2, "Postprocessing called without previously shutting down " + "cache storage. Calling shutdown explicitly..\n"); + shutdown(); + } + + if(get_use_rocpd()) + { + ROCPROFSYS_PRINT( + "Generating rocpd with collected data. This may take a while..\n"); + } + post_process_metadata(); + m_parser.consume_storage(); +} + +void +cache_manager::post_process_metadata() +{ + m_postprocessing.post_process_metadata(); +} + +void +cache_manager::shutdown() +{ + m_storage.shutdown(); +} + +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.hpp new file mode 100644 index 0000000000..acbc447a34 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.hpp @@ -0,0 +1,67 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "buffer_storage.hpp" +#include "core/trace_cache/rocpd_post_processing.hpp" +#include "metadata_registry.hpp" +#include "storage_parser.hpp" + +namespace rocprofsys +{ +namespace trace_cache +{ + +class cache_manager +{ +public: + static cache_manager& get_instance(); + buffer_storage& get_buffer_storage() { return m_storage; } + metadata_registry& get_metadata_regsitry() { return m_metadata; } + void shutdown(); + void post_process(); + +private: + void post_process_metadata(); + cache_manager(); + + buffer_storage m_storage{ getpid() }; + metadata_registry m_metadata; + storage_parser m_parser{ getpid() }; + rocpd_post_processing m_postprocessing; +}; + +inline metadata_registry& +get_metadata_registry() +{ + return cache_manager::get_instance().get_metadata_regsitry(); +} + +inline buffer_storage& +get_buffer_storage() +{ + return cache_manager::get_instance().get_buffer_storage(); +} + +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_utility.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_utility.hpp new file mode 100644 index 0000000000..d59cf8c3e5 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_utility.hpp @@ -0,0 +1,45 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#include "sample_type.hpp" +#include +#include +#include +#include + +namespace rocprofsys +{ +namespace trace_cache +{ +constexpr size_t buffer_size = 100 * tim::units::megabyte; +constexpr size_t flush_threshold = 80 * tim::units::megabyte; +const auto filename = "/tmp/buffered_storage_" + std::to_string(getpid()) + ".bin"; + +constexpr size_t minimal_fragmented_memory_size = sizeof(entry_type) + sizeof(size_t); +using buffer_array_t = std::array; + +constexpr auto ABSOLUTE = "ABS"; +constexpr auto PERCENTAGE = "%"; + +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.cpp new file mode 100644 index 0000000000..160d33ba77 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.cpp @@ -0,0 +1,296 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "metadata_registry.hpp" +#include +#include + +namespace rocprofsys +{ +namespace trace_cache +{ + +namespace +{ + +template +std::optional +get_type_info(const DataType& data, const Filter& filter) +{ + std::optional result = std::nullopt; + data.rlock([&filter, &result](const auto& _data) { + auto it = std::find_if(_data.begin(), _data.end(), filter); + result = it == _data.end() ? std::nullopt : std::optional(*it); + }); + return result; +} + +template +auto +assign_set_to_vector(T& result) +{ + return [&result](const auto& _data) { result.assign(_data.cbegin(), _data.cend()); }; +} +} // namespace + +void +metadata_registry::set_process(const info::process& process) +{ + m_process.wlock([&process](auto& _process) { _process = process; }); +} + +void +metadata_registry::add_pmc_info(const info::pmc& pmc_info) +{ + m_pmc_infos.wlock([&pmc_info](auto& _data) { + if(_data.count(pmc_info) > 0) + { + return; + } + _data.emplace(pmc_info); + }); +} + +void +metadata_registry::add_thread_info(const info::thread& thread_info) +{ + m_threads.wlock([&thread_info](auto& _data) { + if(_data.count(thread_info) > 0) + { + return; + } + _data.emplace(thread_info); + }); +} + +void +metadata_registry::add_track(const info::track& track_info) +{ + m_tracks.wlock([&track_info](auto& _data) { + if(_data.count(track_info) > 0) + { + return; + } + _data.emplace(track_info); + }); +} + +void +metadata_registry::add_queue(const uint64_t& queue_handle) +{ + m_queues.wlock([&queue_handle](auto& _data) { + if(_data.count(queue_handle) > 0) + { + return; + } + _data.emplace(queue_handle); + }); +} + +void +metadata_registry::add_stream(const uint64_t& stream_handle) +{ + m_streams.wlock([&stream_handle](auto& _data) { + if(_data.count(stream_handle) > 0) + { + return; + } + _data.emplace(stream_handle); + }); +} + +void +metadata_registry::add_string(const std::string_view& string_value) +{ + m_strings.wlock([&string_value](auto& _data) { + if(_data.count(string_value) > 0) + { + return; + } + _data.emplace(string_value); + }); +} + +info::process +metadata_registry::get_process_info() const +{ + info::process result; + m_process.rlock([&result](const auto& _process) { result = _process; }); + return result; +} + +std::optional +metadata_registry::get_pmc_info(const std::string_view& unique_name) const +{ + return get_type_info(m_pmc_infos, [&unique_name](const info::pmc& val) { + return val.name == unique_name; + }); +} + +std::optional +metadata_registry::get_thread_info(const uint32_t& thread_id) const +{ + return get_type_info(m_threads, [&thread_id](const info::thread& val) { + return val.thread_id == thread_id; + }); +} + +std::optional +metadata_registry::get_track_info(const std::string_view& track_name) const +{ + return get_type_info(m_tracks, [&track_name](const info::track& val) { + return val.track_name == track_name; + }); +} + +std::vector +metadata_registry::get_pmc_info_list() const +{ + std::vector result; + m_pmc_infos.rlock(assign_set_to_vector(result)); + return result; +} + +std::vector +metadata_registry::get_thread_info_list() const +{ + std::vector result; + m_threads.rlock(assign_set_to_vector(result)); + return result; +} + +std::vector +metadata_registry::get_track_info_list() const +{ + std::vector result; + m_tracks.rlock(assign_set_to_vector(result)); + return result; +} + +std::vector +metadata_registry::get_queue_list() const +{ + std::vector result; + m_queues.rlock(assign_set_to_vector(result)); + return result; +} + +std::vector +metadata_registry::get_stream_list() const +{ + std::vector result; + m_streams.rlock(assign_set_to_vector(result)); + return result; +} + +std::vector +metadata_registry::get_string_list() const +{ + std::vector result; + m_strings.rlock(assign_set_to_vector(result)); + return result; +} + +#if ROCPROFSYS_USE_ROCM + +void +metadata_registry::add_code_object( + const rocprofiler_callback_tracing_code_object_load_data_t& code_object) +{ + m_code_objects.wlock([&code_object](auto& _data) { + if(_data.count(code_object) > 0) + { + return; + } + _data.emplace(code_object); + }); +} + +void +metadata_registry::add_kernel_symbol( + const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t& + kernel_symbol) +{ + m_kernel_symbols.wlock([&kernel_symbol](auto& _data) { + if(_data.count(kernel_symbol) > 0) + { + return; + } + _data.emplace(kernel_symbol); + }); +} + +std::optional +metadata_registry::get_code_object(uint64_t code_object_id) const +{ + return get_type_info( + m_code_objects, + [&code_object_id]( + const rocprofiler_callback_tracing_code_object_load_data_t& val) { + return val.code_object_id == code_object_id; + }); +} + +std::optional +metadata_registry::get_kernel_symbol(uint64_t kernel_id) const +{ + return get_type_info< + rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t>( + m_kernel_symbols, + [&kernel_id]( + const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t& + val) { return val.kernel_id == kernel_id; }); +} + +std::vector +metadata_registry::get_code_object_list() const +{ + std::vector result; + m_code_objects.rlock(assign_set_to_vector(result)); + return result; +} + +std::vector +metadata_registry::get_kernel_symbol_list() const +{ + std::vector + result; + m_kernel_symbols.rlock(assign_set_to_vector(result)); + return result; +} + +rocprofiler::sdk::buffer_name_info_t +metadata_registry::get_buffer_name_info() const +{ + return m_buffered_tracing_info; +} + +rocprofiler::sdk::callback_name_info_t +metadata_registry::get_callback_tracing_info() const +{ + return m_callback_tracing_info; +} + +#endif + +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.hpp new file mode 100644 index 0000000000..2c8a0471bb --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.hpp @@ -0,0 +1,218 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "common/synchronized.hpp" +#include "core/agent.hpp" + +#include +#include +#include +#include +#include +#if ROCPROFSYS_USE_ROCM > 0 +# include +# include +#endif +#include +#include +#include +#include +#include +#include + +namespace rocprofsys +{ +namespace trace_cache +{ +namespace info +{ +struct process +{ + pid_t pid; // < Unique + pid_t ppid; + std::string command; +}; +struct pmc +{ + agent_type type; + size_t agent_type_index; + std::string target_arch; + size_t event_code; + size_t instance_id; + std::string name; // < Unique + std::string symbol; + std::string description; + std::string long_description; + std::string component; + std::string units; + std::string value_type; + std::string block; + std::string expression; + uint32_t is_constant; + uint32_t is_derived; + std::string extdata; +}; + +struct pmc_info_hash +{ + std::size_t operator()(const pmc& _pmc) const noexcept + { + std::size_t h1 = std::hash{}(static_cast(_pmc.type)); + std::size_t h2 = std::hash{}(_pmc.agent_type_index); + std::size_t h3 = std::hash{}(_pmc.name); + return h1 ^ (h2 << 1) ^ (h3 << 1); + } +}; + +struct pmc_info_equal +{ + bool operator()(const pmc& lhs, const pmc& rhs) const noexcept + { + return lhs.type == rhs.type && lhs.agent_type_index == rhs.agent_type_index && + lhs.name == rhs.name; + } +}; + +struct thread +{ + int32_t parent_process_id; + int32_t process_id; + uint64_t thread_id; // < Unique + uint32_t start; + uint32_t end; + std::string extdata; + friend bool operator<(const thread& lhs, const thread& rhs) + { + return lhs.thread_id < rhs.thread_id; + } +}; + +struct track +{ + std::string track_name; // < Unique + std::optional thread_id; + std::string extdata; + + friend bool operator<(const track& lhs, const track& rhs) + { + return lhs.track_name.compare(rhs.track_name) < 0; + } +}; + +#if ROCPROFSYS_USE_ROCM > 0 +struct code_object_less +{ + bool operator()(const rocprofiler_callback_tracing_code_object_load_data_t& lhs, + const rocprofiler_callback_tracing_code_object_load_data_t& rhs) const + { + return lhs.code_object_id < rhs.code_object_id; + } +}; + +struct kernel_symbol_less +{ + bool operator()( + const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t& lhs, + const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t& rhs) + const + { + return lhs.kernel_object < rhs.kernel_object; + } +}; +#endif + +} // namespace info + +class cache_manager; +struct metadata_registry +{ + void set_process(const info::process& process); + void add_pmc_info(const info::pmc& pmc_info); + void add_thread_info(const info::thread& thread_info); + void add_track(const info::track& track_info); + void add_queue(const uint64_t& queue_handle); + void add_stream(const uint64_t& stream_handle); + void add_string(const std::string_view& string_value); + + info::process get_process_info() const; + std::optional get_pmc_info(const std::string_view& unique_name) const; + std::optional get_thread_info(const uint32_t& thread_id) const; + std::optional get_track_info(const std::string_view& track_name) const; + std::vector get_pmc_info_list() const; + std::vector get_thread_info_list() const; + std::vector get_track_info_list() const; + std::vector get_queue_list() const; + std::vector get_stream_list() const; + std::vector get_string_list() const; + +#if ROCPROFSYS_USE_ROCM > 0 + void add_code_object( + const rocprofiler_callback_tracing_code_object_load_data_t& code_object); + void add_kernel_symbol( + const rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t& + kernel_symbol); + std::vector + get_code_object_list() const; + std::vector + get_kernel_symbol_list() const; + std::optional get_code_object( + uint64_t code_object_id) const; + std::optional + get_kernel_symbol(uint64_t kernel_id) const; + rocprofiler::sdk::buffer_name_info_t get_buffer_name_info() const; + rocprofiler::sdk::callback_name_info_t get_callback_tracing_info() const; +#endif + +private: + friend class cache_manager; + metadata_registry() = default; + common::synchronized m_process; + common::synchronized< + std::unordered_set> + m_pmc_infos; + common::synchronized> m_threads; + common::synchronized> m_tracks; + + common::synchronized> m_streams; + common::synchronized> m_queues; + common::synchronized> m_strings; +#if ROCPROFSYS_USE_ROCM > 0 + common::synchronized> + m_code_objects; + common::synchronized< + std::set> + m_kernel_symbols; + rocprofiler::sdk::buffer_name_info_t m_buffered_tracing_info{ + rocprofiler::sdk::get_buffer_tracing_names() + }; + rocprofiler::sdk::callback_name_info_t m_callback_tracing_info{ + rocprofiler::sdk::get_callback_tracing_names() + }; +#endif +}; + +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.cpp new file mode 100644 index 0000000000..f3777e8690 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.cpp @@ -0,0 +1,574 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "trace_cache/rocpd_post_processing.hpp" +#include "agent_manager.hpp" +#include "common.hpp" +#include "config.hpp" +#include "debug.hpp" +#include "library/thread_info.hpp" +#include "node_info.hpp" +#include "rocpd/data_processor.hpp" +#include "trace_cache/metadata_registry.hpp" +#include "trace_cache/sample_type.hpp" +#include "trace_cache/storage_parser.hpp" +#include +#include +#include +#include +#include +#if ROCPROFSYS_USE_ROCM > 0 +# include "library/rocprofiler-sdk/fwd.hpp" +# include +# include +#endif + +namespace rocprofsys +{ +namespace trace_cache +{ +namespace +{ +rocpd::data_processor& +get_data_processor() +{ + return rocpd::data_processor::get_instance(); +} + +#if ROCPROFSYS_USE_ROCM > 0 +auto +get_handle_from_code_object( + const rocprofiler_callback_tracing_code_object_load_data_t& code_object) +{ +# if(ROCPROFILER_VERSION >= 600) + return code_object.agent_id.handle; +# else + return code_object.rocp_agent.handle; +# endif +} +#endif +} // namespace + +postprocessing_callback +rocpd_post_processing::get_kernel_dispatch_callback() const +{ + return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) { +#if ROCPROFSYS_USE_ROCM > 0 + auto _kds = static_cast(parsed); + + auto& data_processor = get_data_processor(); + auto& agent_manager = agent_manager::get_instance(); + auto& n_info = node_info::get_instance(); + auto process = m_metadata.get_process_info(); + auto agent_primary_key = + agent_manager.get_agent_by_handle(_kds.agent_id_handle).base_id; + + auto thread_primary_key = + data_processor.map_thread_id_to_primary_key(_kds.thread_id); + + auto category_id = data_processor.insert_string( + trait::name::value); + + auto kernel_symbol = m_metadata.get_kernel_symbol(_kds.kernel_id); + + if(!kernel_symbol.has_value()) + { + throw std::runtime_error("Kernel symbol is missing for kernel dispatch"); + return; + } + + auto region_name_primary_key = data_processor.insert_string( + tim::demangle(kernel_symbol->kernel_name).c_str()); + + auto stack_id = _kds.correlation_id_internal; + auto parent_stack_id = _kds.correlation_id_ancestor; + auto correlation_id = 0; + + auto event_id = data_processor.insert_event(category_id, stack_id, + parent_stack_id, correlation_id); + + data_processor.insert_kernel_dispatch( + n_info.id, process.pid, thread_primary_key, agent_primary_key, _kds.kernel_id, + _kds.dispatch_id, _kds.queue_id_handle, _kds.stream_handle, + _kds.start_timestamp, _kds.end_timestamp, _kds.private_segment_size, + _kds.group_segment_size, _kds.workgroup_size_x, _kds.workgroup_size_y, + _kds.workgroup_size_z, _kds.grid_size_x, _kds.grid_size_y, _kds.grid_size_z, + region_name_primary_key, event_id); +#endif + }; +} + +postprocessing_callback +rocpd_post_processing::get_memory_copy_callback() const +{ + return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) { +#if ROCPROFSYS_USE_ROCM > 0 + auto _mcs = static_cast(parsed); + + auto& data_processor = get_data_processor(); + auto& agent_manager = agent_manager::get_instance(); + auto& n_info = node_info::get_instance(); + auto process = m_metadata.get_process_info(); + + auto _name = std::string{ m_metadata.get_buffer_name_info().at( + static_cast(_mcs.kind), + static_cast(_mcs.operation)) }; + auto name_primary_key = data_processor.insert_string(_name.c_str()); + + auto category_primary_key = + data_processor.insert_string(trait::name::value); + + auto thread_primary_key = + data_processor.map_thread_id_to_primary_key(_mcs.thread_id); + + auto dst_agent_primary_key = + agent_manager.get_agent_by_handle(_mcs.dst_agent_id_handle).base_id; + auto src_agent_primary_key = + agent_manager.get_agent_by_handle(_mcs.src_agent_id_handle).base_id; + + auto stack_id = _mcs.correlation_id_internal; + auto parent_stack_id = _mcs.correlation_id_ancestor; + auto correlation_id = 0; + auto queue_id = 0; + + auto event_primary_key = data_processor.insert_event( + category_primary_key, stack_id, parent_stack_id, correlation_id); + + data_processor.insert_memory_copy( + n_info.id, process.pid, thread_primary_key, _mcs.start_timestamp, + _mcs.end_timestamp, name_primary_key, dst_agent_primary_key, + _mcs.dst_address_value, src_agent_primary_key, _mcs.src_address_value, + _mcs.bytes, queue_id, _mcs.stream_handle, name_primary_key, + event_primary_key); +#endif + }; +} + +#if(ROCPROFSYS_USE_ROCM > 0 && ROCPROFILER_VERSION >= 600) +postprocessing_callback +rocpd_post_processing::get_memory_allocate_callback() const +{ +# if ROCPROFSYS_USE_ROCM > 0 + auto memtype_to_db = + [](std::string_view memory_type) -> std::pair { + constexpr auto MEMORY_PREFIX = std::string_view{ "MEMORY_ALLOCATION_" }; + constexpr auto SCRATCH_PREFIX = std::string_view{ "SCRATCH_MEMORY_" }; + constexpr auto VMEM_PREFIX = std::string_view{ "VMEM_" }; + constexpr auto ASYNC_PREFIX = std::string_view{ "ASYNC_" }; + + std::string _type; + std::string _level; + if(memory_type.find(MEMORY_PREFIX) == 0) + { + _type = memory_type.substr(MEMORY_PREFIX.length()); + if(_type.find(VMEM_PREFIX) == 0) + { + _type = _type.substr(VMEM_PREFIX.length()); + _level = "VIRTUAL"; + } + else + { + _level = "REAL"; + } + } + else if(memory_type.find(SCRATCH_PREFIX) == 0) + { + _type = memory_type.substr(SCRATCH_PREFIX.length()); + _level = "SCRATCH"; + if(memory_type.find(ASYNC_PREFIX) == 0) + { + _type = memory_type.substr(ASYNC_PREFIX.length()); // RECLAIM + } + } + + if(_type == "ALLOCATE") + { + _type = "ALLOC"; + } + + return std::make_pair(_type, _level); + }; +# endif + + return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) { +# if ROCPROFSYS_USE_ROCM > 0 + auto _mas = static_cast(parsed); + auto& data_processor = get_data_processor(); + auto& agent_manager = agent_manager::get_instance(); + auto& n_info = node_info::get_instance(); + auto process = m_metadata.get_process_info(); + auto thread_primary_key = + data_processor.map_thread_id_to_primary_key(_mas.thread_id); + auto agent_primary_key = std::optional{}; + + const auto invalid_context = ROCPROFILER_CONTEXT_NONE; + if(_mas.agent_id_handle != invalid_context.handle) + { + { + agent_primary_key = + agent_manager.get_agent_by_handle(_mas.agent_id_handle).base_id; + } + const auto* _name = m_metadata.get_buffer_name_info().at( + static_cast(_mas.kind), + static_cast(_mas.operation)); + + auto [type, level] = memtype_to_db(_name); + + auto stack_id = _mas.correlation_id_internal; + auto parent_stack_id = _mas.correlation_id_ancestor; + auto correlation_id = 0; + auto queue_id = 0; + + auto category_primary_key = data_processor.insert_string( + trait::name::value); + + auto event_primary_key = data_processor.insert_event( + category_primary_key, stack_id, parent_stack_id, correlation_id); + + data_processor.insert_memory_alloc( + n_info.id, process.pid, thread_primary_key, agent_primary_key, + type.c_str(), level.c_str(), _mas.start_timestamp, _mas.end_timestamp, + _mas.address_value, _mas.allocation_size, queue_id, _mas.stream_handle, + event_primary_key); +# endif + }; + }; +} +#endif + +postprocessing_callback +rocpd_post_processing::get_region_callback() const +{ + [[maybe_unused]] auto parse_args = []([[maybe_unused]] const std::string& arg_str) { +#if ROCPROFSYS_USE_ROCM > 0 + rocprofiler_sdk::function_args_t args; + const std::string delimiter = ";;"; + + auto split = [](const std::string& str, const std::string& _delimiter) { + std::vector tokens; + size_t start = 0; + size_t end = str.find(_delimiter); + + while(end != std::string::npos) + { + tokens.push_back(str.substr(start, end - start)); + start = end + _delimiter.length(); + end = str.find(_delimiter, start); + } + + return tokens; + }; + + auto tokens = split(arg_str, delimiter); + + // Ensure the number of tokens is a multiple of 4 + if(tokens.size() % 4 != 0) + { + throw std::invalid_argument("Malformed argument string."); + } + + for(auto it = tokens.begin(); it != tokens.end(); it += 4) + { + rocprofiler_sdk::argument_info arg = { static_cast(std::stoi(*it)), + *(it + 1), *(it + 2), *(it + 3) }; + args.push_back(arg); + } + + return args; +#endif + }; + + return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) { +#if ROCPROFSYS_USE_ROCM > 0 + auto _rs = static_cast(parsed); + auto& data_processor = get_data_processor(); + auto& n_info = node_info::get_instance(); + auto process = m_metadata.get_process_info(); + auto thread_primary_key = + data_processor.map_thread_id_to_primary_key(_rs.thread_id); + + auto callback_tracing_info = m_metadata.get_callback_tracing_info(); + auto _name = std::string{ callback_tracing_info.at( + static_cast(_rs.kind), + static_cast(_rs.operation)) }; + auto name_primary_key = data_processor.insert_string(_name.c_str()); + + auto category_primary_key = data_processor.insert_string(_rs.category.c_str()); + + size_t stack_id = _rs.correlation_id_internal; + size_t parent_stack_id = _rs.correlation_id_ancestor; + size_t correlation_id = 0; + + auto event_primary_key = + data_processor.insert_event(category_primary_key, stack_id, parent_stack_id, + correlation_id, _rs.call_stack.c_str()); + + auto args = parse_args(_rs.args_str); + for(const auto& arg : args) + { + data_processor.insert_args(event_primary_key, arg.arg_number, + arg.arg_type.c_str(), arg.arg_name.c_str(), + arg.arg_value.c_str()); + } + + data_processor.insert_region(n_info.id, process.pid, thread_primary_key, + _rs.start_timestamp, _rs.end_timestamp, + name_primary_key, event_primary_key); +#endif + }; +} + +postprocessing_callback +rocpd_post_processing::get_in_time_sample_callback() const +{ + return [&](const storage_parsed_type_base& parsed) { + auto _its = static_cast(parsed); + auto& data_processor = get_data_processor(); + auto track_primary_key = data_processor.insert_string(_its.track_name.c_str()); + + auto event_id = data_processor.insert_event( + track_primary_key, _its.stack_id, _its.parent_stack_id, _its.correlation_id, + _its.call_stack.c_str(), _its.line_info.c_str(), _its.event_metadata.c_str()); + data_processor.insert_sample(_its.track_name.c_str(), _its.timestamp_ns, event_id, + "{}"); + }; +} +postprocessing_callback +rocpd_post_processing::get_pmc_event_with_sample_callback() const +{ + return [&](const storage_parsed_type_base& parsed) { + auto _pmc = static_cast(parsed); + auto& data_processor = get_data_processor(); + auto track_primary_key = data_processor.insert_string(_pmc.track_name.c_str()); + + auto& agent_manager = agent_manager::get_instance(); + auto agent_primary_key = + agent_manager.get_agent_by_handle(_pmc.agent_handle).base_id; + + auto event_id = data_processor.insert_event( + track_primary_key, _pmc.stack_id, _pmc.parent_stack_id, _pmc.correlation_id, + _pmc.call_stack.c_str(), _pmc.line_info.c_str(), _pmc.event_metadata.c_str()); + data_processor.insert_sample(_pmc.track_name.c_str(), _pmc.timestamp_ns, event_id, + "{}"); + + data_processor.insert_pmc_event(event_id, agent_primary_key, + _pmc.pmc_info_name.c_str(), _pmc.value); + }; +} + +rocpd_post_processing::rocpd_post_processing(metadata_registry& md) +: m_metadata(md) +{} + +void +rocpd_post_processing::register_parser_callback([[maybe_unused]] storage_parser& parser) +{ +#if ROCPROFSYS_USE_ROCM > 0 + if(!get_use_rocpd()) + { + return; + } + parser.register_type_callback(entry_type::region, get_region_callback()); + parser.register_type_callback(entry_type::kernel_dispatch, + get_kernel_dispatch_callback()); + parser.register_type_callback(entry_type::memory_copy, get_memory_copy_callback()); +# if(ROCPROFILER_VERSION >= 600) + parser.register_type_callback(entry_type::memory_alloc, + get_memory_allocate_callback()); +# endif + parser.register_type_callback(entry_type::in_time_sample, + get_in_time_sample_callback()); + parser.register_type_callback(entry_type::pmc_event_with_sample, + get_pmc_event_with_sample_callback()); + ROCPROFSYS_DEBUG("Buffer parser callbacks are registered.."); +#endif +} + +void +rocpd_post_processing::post_process_metadata() +{ +#if ROCPROFSYS_USE_ROCM > 0 + if(!get_use_rocpd()) + { + return; + } + ROCPROFSYS_DEBUG("Post processing metadata.."); + auto& data_processor = get_data_processor(); + auto& agent_mngr = agent_manager::get_instance(); + auto n_info = node_info::get_instance(); + + data_processor.insert_node_info(n_info.id, n_info.hash, n_info.machine_id.c_str(), + n_info.system_name.c_str(), n_info.node_name.c_str(), + n_info.release.c_str(), n_info.version.c_str(), + n_info.machine.c_str(), n_info.domain_name.c_str()); + + auto process_info = m_metadata.get_process_info(); + data_processor.insert_process_info(n_info.id, process_info.ppid, process_info.pid, 0, + 0, 0, 0, process_info.command.c_str(), "{}"); + + const auto& agents = agent_mngr.get_agents(); + int counter = 0; + for(const auto& rocpd_agent : agents) + { + auto _base_id = rocpd::data_processor::get_instance().insert_agent( + n_info.id, process_info.pid, + ((rocpd_agent->type == agent_type::GPU) ? "GPU" : "CPU"), counter++, + rocpd_agent->logical_node_id, rocpd_agent->logical_node_type_id, + rocpd_agent->device_id, rocpd_agent->name.c_str(), + rocpd_agent->model_name.c_str(), rocpd_agent->vendor_name.c_str(), + rocpd_agent->product_name.c_str(), ""); + rocpd_agent->base_id = _base_id; + } + auto _string_list = m_metadata.get_string_list(); + for(auto& _string : _string_list) + { + data_processor.insert_string(std::string(_string).c_str()); + } + + auto _thread_info_list = m_metadata.get_thread_info_list(); + for(auto& t_info : _thread_info_list) + { + rocpd_insert_thread_id(t_info, n_info, process_info); + } + + auto _track_info_list = m_metadata.get_track_info_list(); + for(auto& track : _track_info_list) + { + auto thread_id = + track.thread_id.has_value() + ? std::make_optional(data_processor.map_thread_id_to_primary_key( + track.thread_id.value())) + : std::nullopt; + data_processor.insert_track(track.track_name.c_str(), n_info.id, process_info.pid, + thread_id); + } + + auto _code_object_list = m_metadata.get_code_object_list(); + for(const auto& code_object : _code_object_list) + { + auto dev_id = + agent_mngr.get_agent_by_handle(get_handle_from_code_object(code_object)) + .base_id; + + const char* strg_type = "UNKNOWN"; + switch(code_object.storage_type) + { + case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE: strg_type = "FILE"; break; + case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY: strg_type = "MEMORY"; break; + default: break; + } + data_processor.insert_code_object(code_object.code_object_id, n_info.id, + process_info.pid, dev_id, code_object.uri, + code_object.load_base, code_object.load_size, + code_object.load_delta, strg_type); + } + + auto _kernel_symbols_list = m_metadata.get_kernel_symbol_list(); + for(const auto& kernel_symbol : _kernel_symbols_list) + { + auto kernel_name = tim::demangle(kernel_symbol.kernel_name); + data_processor.insert_kernel_symbol( + kernel_symbol.kernel_id, n_info.id, process_info.pid, + kernel_symbol.code_object_id, kernel_symbol.kernel_name, kernel_name.c_str(), + kernel_symbol.kernel_object, kernel_symbol.kernarg_segment_size, + kernel_symbol.kernarg_segment_alignment, kernel_symbol.group_segment_size, + kernel_symbol.private_segment_size, kernel_symbol.sgpr_count, + kernel_symbol.arch_vgpr_count, kernel_symbol.accum_vgpr_count); + + data_processor.insert_string(kernel_name.c_str()); + } + + auto _queue_list = m_metadata.get_queue_list(); + for(const auto& queue_handle : _queue_list) + { + std::stringstream ss; + ss << "Queue " << queue_handle; + data_processor.insert_queue_info(queue_handle, n_info.id, process_info.pid, + ss.str().c_str()); + } + + auto _stream_list = m_metadata.get_stream_list(); + for(const auto& stream_handle : _stream_list) + { + std::stringstream ss; + ss << "Stream " << stream_handle; + data_processor.insert_stream_info(stream_handle, n_info.id, process_info.pid, + ss.str().c_str()); + } + + auto buffer_info_list = m_metadata.get_buffer_name_info(); + for(const auto& buffer_info : buffer_info_list) + { + for(const auto& item : buffer_info.items()) + { + data_processor.insert_string(*item.second); + } + } + + auto callback_info_list = m_metadata.get_callback_tracing_info(); + for(const auto& cb_info : callback_info_list) + { + for(const auto& item : cb_info.items()) + { + data_processor.insert_string(*item.second); + } + } + + auto pmc_info_list = m_metadata.get_pmc_info_list(); + for(const auto& pmc_info : pmc_info_list) + { + const auto agent_primary_key = + agent_mngr.get_agent_by_type_index(pmc_info.agent_type_index, pmc_info.type) + .base_id; + + data_processor.insert_pmc_description( + n_info.id, process_info.pid, agent_primary_key, pmc_info.target_arch.c_str(), + pmc_info.event_code, pmc_info.instance_id, pmc_info.name.c_str(), + pmc_info.symbol.c_str(), pmc_info.description.c_str(), + pmc_info.long_description.c_str(), pmc_info.component.c_str(), + pmc_info.units.c_str(), pmc_info.value_type.c_str(), pmc_info.block.c_str(), + pmc_info.expression.c_str(), pmc_info.is_constant, pmc_info.is_derived); + } +#endif +} + +inline void +rocpd_post_processing::rocpd_insert_thread_id(info::thread& t_info, + const node_info& n_info, + const info::process& process_info) const +{ + const auto& extended_info = thread_info::get(t_info.thread_id, SequentTID); + if(extended_info.has_value()) + { + t_info.start = extended_info->get_start(); + t_info.end = extended_info->get_stop(); + } + + std::stringstream ss; + ss << "Thread " << t_info.thread_id; + get_data_processor().insert_thread_info(n_info.id, process_info.ppid, + process_info.pid, t_info.thread_id, + ss.str().c_str(), t_info.start, t_info.end); +} + +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.hpp new file mode 100644 index 0000000000..b2d9a228e8 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.hpp @@ -0,0 +1,60 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#include "core/node_info.hpp" +#include "core/trace_cache/metadata_registry.hpp" +#include "core/trace_cache/storage_parser.hpp" + +namespace rocprofsys +{ +namespace trace_cache +{ + +class rocpd_post_processing +{ +public: + rocpd_post_processing(metadata_registry& metadata); + + void register_parser_callback(storage_parser& parser); + void post_process_metadata(); + +private: + using primary_key = size_t; + + inline void rocpd_insert_thread_id(info::thread& t_info, const node_info& n_info, + const info::process& process_info) const; + + postprocessing_callback get_kernel_dispatch_callback() const; + postprocessing_callback get_memory_copy_callback() const; +#if(ROCPROFILER_VERSION >= 600) + postprocessing_callback get_memory_allocate_callback() const; +#endif + postprocessing_callback get_region_callback() const; + postprocessing_callback get_in_time_sample_callback() const; + postprocessing_callback get_pmc_event_with_sample_callback() const; + + metadata_registry& m_metadata; +}; + +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/sample_type.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/sample_type.hpp new file mode 100644 index 0000000000..bf7941e788 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/sample_type.hpp @@ -0,0 +1,198 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#include +#include +#include +#include + +#if ROCPROFSYS_USE_ROCM > 0 +# include +#endif + +namespace rocprofsys +{ +namespace trace_cache +{ + +struct storage_parsed_type_base +{}; + +struct kernel_dispatch_sample : storage_parsed_type_base +{ + // Timing fields + uint64_t start_timestamp; + uint64_t end_timestamp; + + // Identification fields + uint64_t thread_id; + uint64_t agent_id_handle; + uint64_t kernel_id; + uint64_t dispatch_id; + uint64_t queue_id_handle; + + // Correlation fields + uint64_t correlation_id_internal; + uint64_t correlation_id_ancestor; + + // Dispatch configuration + uint32_t private_segment_size; + uint32_t group_segment_size; + uint32_t workgroup_size_x; + uint32_t workgroup_size_y; + uint32_t workgroup_size_z; + uint32_t grid_size_x; + uint32_t grid_size_y; + uint32_t grid_size_z; + + // Stream handle + size_t stream_handle; +}; + +struct memory_copy_sample : storage_parsed_type_base +{ + // Timing fields + uint64_t start_timestamp; + uint64_t end_timestamp; + + // Identification fields + uint64_t thread_id; + uint64_t dst_agent_id_handle; + uint64_t src_agent_id_handle; + + // Operation details + int32_t kind; + int32_t operation; + uint64_t bytes; + + // Correlation fields + uint64_t correlation_id_internal; + uint64_t correlation_id_ancestor; + + // Address fields (version dependent) + uint64_t dst_address_value; + uint64_t src_address_value; + + // Stream handle + size_t stream_handle; +}; + +#if(ROCPROFILER_VERSION >= 600) +struct memory_allocate_sample : storage_parsed_type_base +{ + // Timing fields + uint64_t start_timestamp; + uint64_t end_timestamp; + + // Identification fields + uint64_t thread_id; + uint64_t agent_id_handle; + + // Operation details + int32_t kind; + int32_t operation; + uint64_t allocation_size; + + // Correlation fields + uint64_t correlation_id_internal; + uint64_t correlation_id_ancestor; + + // Address fields (version dependent) + uint64_t address_value; + + // Stream handle + size_t stream_handle; +}; +#endif + +struct region_sample : storage_parsed_type_base +{ + region_sample() = default; + region_sample(uint64_t _thread_id, int32_t _kind, int32_t _operation, + uint64_t _correlation_id_internal, uint64_t _correlation_id_ancestor, + uint64_t _start_timestamp, uint64_t _end_timestamp, + std::string _call_stack, std::string _args_str, std::string _category) + : thread_id(_thread_id) + , kind(_kind) + , operation(_operation) + , correlation_id_internal(_correlation_id_internal) + , correlation_id_ancestor(_correlation_id_ancestor) + , start_timestamp(_start_timestamp) + , end_timestamp(_end_timestamp) + , call_stack(std::move(_call_stack)) + , args_str(std::move(_args_str)) + , category(std::move(_category)) + {} + + // Identification fields + uint64_t thread_id; + int32_t kind; + int32_t operation; + + // Correlation fields + uint64_t correlation_id_internal; + uint64_t correlation_id_ancestor; + + // Timing fields + uint64_t start_timestamp; + uint64_t end_timestamp; + + // Additional fields + std::string call_stack; + std::string args_str; + std::string category; +}; + +struct in_time_sample : storage_parsed_type_base +{ + std::string track_name; + size_t timestamp_ns; + std::string event_metadata; + size_t stack_id; + size_t parent_stack_id; + size_t correlation_id; + std::string call_stack; + std::string line_info; +}; + +struct pmc_event_with_sample : in_time_sample +{ + size_t agent_handle; + std::string pmc_info_name; + size_t value; +}; + +enum class entry_type : uint32_t +{ + in_time_sample = 0x0000, + pmc_event_with_sample = 0x0001, + region = 0x0002, + kernel_dispatch = 0x0003, + memory_copy = 0x0004, +#if(ROCPROFSYS_USE_ROCM && ROCPROFILER_VERSION >= 600) + memory_alloc = 0x0005, +#endif + fragmented_space = 0xFFFF +}; +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.cpp new file mode 100644 index 0000000000..1f0d23f293 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.cpp @@ -0,0 +1,230 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "storage_parser.hpp" +#include "debug.hpp" +#include "trace_cache/sample_type.hpp" +#include +#include +#include +#include + +namespace rocprofsys +{ +namespace trace_cache +{ + +storage_parser::storage_parser(pid_t _pid) +: m_pid(_pid) +{} + +void +storage_parser::register_type_callback( + const entry_type& type, + const std::function& callback) +{ + m_callbacks[type].push_back(callback); +} + +void +storage_parser::consume_storage() +{ + ROCPROFSYS_DEBUG("Consuming buffered storage with filename: %s", filename.c_str()); + if(m_pid != getpid()) + { + ROCPROFSYS_DEBUG( + "Storage parser is not created in same process as shutting down.."); + return; + } + + std::ifstream ifs(filename, std::ios::binary); + if(!ifs) + { + std::stringstream ss; + ss << "Error opening file for reading: " << filename << "\n"; + throw std::runtime_error(ss.str()); + } + + bool _parsing_needed = !m_callbacks.empty(); + + struct __attribute__((packed)) sample_header + { + entry_type type; + size_t sample_size; + }; + + sample_header header; + + while(!ifs.eof() && _parsing_needed) + { + ifs.read(reinterpret_cast(&header), sizeof(header)); + + if(header.sample_size == 0 || ifs.eof()) + { + continue; + } + + std::vector sample; + sample.reserve(header.sample_size); + ifs.read(reinterpret_cast(sample.data()), header.sample_size); + + if(ifs.bad()) + { + ROCPROFSYS_WARNING( + 1, + "Bad read while consuming buffered storage. Filename: %s. Bytes read: %d", + filename.c_str(), static_cast(ifs.tellg())); + continue; + } + + switch(header.type) + { + case entry_type::kernel_dispatch: + { + kernel_dispatch_sample _kernel_dispatch_sample; + parse_data(sample.data(), _kernel_dispatch_sample.start_timestamp, + _kernel_dispatch_sample.end_timestamp, + _kernel_dispatch_sample.thread_id, + _kernel_dispatch_sample.agent_id_handle, + _kernel_dispatch_sample.kernel_id, + _kernel_dispatch_sample.dispatch_id, + _kernel_dispatch_sample.queue_id_handle, + _kernel_dispatch_sample.correlation_id_internal, + _kernel_dispatch_sample.correlation_id_ancestor, + _kernel_dispatch_sample.private_segment_size, + _kernel_dispatch_sample.group_segment_size, + _kernel_dispatch_sample.workgroup_size_x, + _kernel_dispatch_sample.workgroup_size_y, + _kernel_dispatch_sample.workgroup_size_z, + _kernel_dispatch_sample.grid_size_x, + _kernel_dispatch_sample.grid_size_y, + _kernel_dispatch_sample.grid_size_z, + _kernel_dispatch_sample.stream_handle); + + invoke_callbacks(header.type, _kernel_dispatch_sample); + break; + } + case entry_type::memory_copy: + { + memory_copy_sample _memory_copy_sample; + parse_data( + sample.data(), _memory_copy_sample.start_timestamp, + _memory_copy_sample.end_timestamp, _memory_copy_sample.thread_id, + _memory_copy_sample.dst_agent_id_handle, + _memory_copy_sample.src_agent_id_handle, _memory_copy_sample.kind, + _memory_copy_sample.operation, _memory_copy_sample.bytes, + _memory_copy_sample.correlation_id_internal, + _memory_copy_sample.correlation_id_ancestor, + _memory_copy_sample.dst_address_value, + _memory_copy_sample.src_address_value, + _memory_copy_sample.stream_handle); + invoke_callbacks(header.type, _memory_copy_sample); + break; + } +#if(ROCPROFILER_VERSION >= 600) + case entry_type::memory_alloc: + { + memory_allocate_sample _memory_allocate_sample; + parse_data(sample.data(), _memory_allocate_sample.start_timestamp, + _memory_allocate_sample.end_timestamp, + _memory_allocate_sample.thread_id, + _memory_allocate_sample.agent_id_handle, + _memory_allocate_sample.kind, + _memory_allocate_sample.operation, + _memory_allocate_sample.allocation_size, + _memory_allocate_sample.correlation_id_internal, + _memory_allocate_sample.correlation_id_ancestor, + _memory_allocate_sample.address_value, + _memory_allocate_sample.stream_handle); + + invoke_callbacks(header.type, _memory_allocate_sample); + break; + } +#endif + case entry_type::region: + { + region_sample _region_sample; + parse_data(sample.data(), _region_sample.thread_id, _region_sample.kind, + _region_sample.operation, + _region_sample.correlation_id_internal, + _region_sample.correlation_id_ancestor, + _region_sample.start_timestamp, _region_sample.end_timestamp, + _region_sample.call_stack, _region_sample.args_str, + _region_sample.category); + + invoke_callbacks(header.type, _region_sample); + break; + } + case entry_type::in_time_sample: + { + in_time_sample _in_time_sample; + parse_data(sample.data(), _in_time_sample.track_name, + _in_time_sample.timestamp_ns, _in_time_sample.event_metadata, + _in_time_sample.stack_id, _in_time_sample.parent_stack_id, + _in_time_sample.correlation_id, _in_time_sample.call_stack, + _in_time_sample.line_info); + invoke_callbacks(header.type, _in_time_sample); + break; + } + case entry_type::pmc_event_with_sample: + { + pmc_event_with_sample _pmc_event_with_sample; + parse_data( + sample.data(), _pmc_event_with_sample.track_name, + _pmc_event_with_sample.timestamp_ns, + _pmc_event_with_sample.event_metadata, + _pmc_event_with_sample.stack_id, + _pmc_event_with_sample.parent_stack_id, + _pmc_event_with_sample.correlation_id, + _pmc_event_with_sample.call_stack, _pmc_event_with_sample.line_info, + _pmc_event_with_sample.agent_handle, + _pmc_event_with_sample.pmc_info_name, _pmc_event_with_sample.value); + invoke_callbacks(header.type, _pmc_event_with_sample); + break; + } + default: break; + } + } + + ifs.close(); + ROCPROFSYS_DEBUG("File parsing finished. Removing %s from file system", + filename.c_str()); + std::remove(filename.c_str()); +} + +void +storage_parser::invoke_callbacks(entry_type type, const storage_parsed_type_base& parsed) +{ + auto _callback_list = m_callbacks.find(type); + if(_callback_list == m_callbacks.end()) + { + ROCPROFSYS_VERBOSE(1, "Callback not found for cache postprocessing"); + return; + } + + for(auto& cb : _callback_list->second) + { + cb(parsed); + } +} +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.hpp new file mode 100644 index 0000000000..a1e342917c --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.hpp @@ -0,0 +1,83 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "buffer_storage.hpp" +#include "sample_type.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace rocprofsys +{ +namespace trace_cache +{ +using postprocessing_callback = std::function; +class cache_manager; +class storage_parser +{ +public: + void register_type_callback(const entry_type& type, + const postprocessing_callback& callback); + + void consume_storage(); + +private: + friend class cache_manager; + storage_parser(pid_t _pid); + template + static void process_arg(const uint8_t*& data_pos, T& arg) + { + if constexpr(std::is_same_v) + { + arg = std::string((const char*) data_pos); + data_pos += arg.size() + 1; + } + else + { + arg = *reinterpret_cast(data_pos); + data_pos += sizeof(T); + } + } + + template + static void parse_data(const uint8_t* data_pos, Args&... args) + { + (process_arg(data_pos, args), ...); + } + +private: + pid_t m_pid; + void invoke_callbacks(entry_type type, const storage_parsed_type_base& parsed); + std::map> m_callbacks; +}; + +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/dl.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/dl.cpp index d1f0a1f9bd..fb4d709616 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/dl.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys-dl/dl.cpp @@ -147,16 +147,6 @@ reset_rocprofsys_preload() { (void) get_rocprofsys_is_preloaded(); (void) get_rocprofsys_preload(); - auto _modified_preload = std::string{}; - for(const auto& itr : delimit(_preload_libs, ":")) - { - if(itr.find("librocprof-sys") != std::string::npos) continue; - _modified_preload += common::join("", ":", itr); - } - if(!_modified_preload.empty() && _modified_preload.find(':') == 0) - _modified_preload = _modified_preload.substr(1); - - setenv("LD_PRELOAD", _modified_preload.c_str(), 1); } } @@ -1251,9 +1241,9 @@ rocprofsys_preload() verify_instrumented_preloaded(); - static bool _once = false; - if(_once) return _preload; - _once = true; + static pid_t _once = 0; + if(_once == getpid()) return _preload; + _once = getpid(); if(_preload) { diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h b/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h index 27fb063ff3..f5e6b1e3dd 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys-user/rocprofiler-systems/categories.h @@ -48,6 +48,7 @@ extern "C" ROCPROFSYS_CATEGORY_ROCM_HSA_API, ROCPROFSYS_CATEGORY_ROCM_KERNEL_DISPATCH, ROCPROFSYS_CATEGORY_ROCM_MEMORY_COPY, + ROCPROFSYS_CATEGORY_ROCM_MEMORY_ALLOCATE, ROCPROFSYS_CATEGORY_ROCM_SCRATCH_MEMORY, ROCPROFSYS_CATEGORY_ROCM_HIP_STREAM, ROCPROFSYS_CATEGORY_ROCM_PAGE_MIGRATION, diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp index 34984cdb0a..aeff6cf976 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp @@ -44,6 +44,7 @@ #include "core/perfetto_fwd.hpp" #include "core/rocpd/data_processor.hpp" #include "core/timemory.hpp" +#include "core/trace_cache/cache_manager.hpp" #include "core/utility.hpp" #include "library/causal/data.hpp" #include "library/causal/experiment.hpp" @@ -328,37 +329,17 @@ read_command_line(pid_t _pid) } void -rocprofsys_preinit_rocpd() +rocprofsys_preinit_cache() { - auto& _data_processor = rocpd::data_processor::get_instance(); - const auto& _n_info = node_info::get_instance(); - auto _cmd_line = read_command_line(getpid()); - auto& _agent_manager = agent_manager::get_instance(); + auto _cmd_line = read_command_line(getpid()); if(_cmd_line.empty()) { _cmd_line.emplace_back("rocprofiler-systems"); } - _data_processor.insert_node_info( - _n_info.id, _n_info.hash, _n_info.machine_id.c_str(), _n_info.system_name.c_str(), - _n_info.node_name.c_str(), _n_info.release.c_str(), _n_info.version.c_str(), - _n_info.machine.c_str(), _n_info.domain_name.c_str()); - _data_processor.insert_process_info(_n_info.id, getppid(), getpid(), 0, 0, 0, 0, - _cmd_line[0].c_str(), "{}"); - - const auto& agents = _agent_manager.get_agents(); - for(const auto& rocpd_agent : agents) - { - auto _base_id = rocpd::data_processor::get_instance().insert_agent( - _n_info.id, getpid(), - ((rocpd_agent->type == agent_type::GPU) ? "GPU" : "CPU"), - rocpd_agent->node_id, rocpd_agent->logical_node_id, - rocpd_agent->logical_node_type_id, rocpd_agent->id, rocpd_agent->name.c_str(), - rocpd_agent->model_name.c_str(), rocpd_agent->vendor_name.c_str(), - rocpd_agent->product_name.c_str(), ""); - rocpd_agent->base_id = _base_id; - } + trace_cache::get_metadata_registry().set_process( + { getpid(), getppid(), _cmd_line.at(0) }); } void @@ -534,7 +515,7 @@ rocprofsys_init_tooling_hidden(void) #if !(ROCPROFSYS_USE_ROCM > 0) rocprofsys_preinit_cpu_agents(); #endif - if(get_use_rocpd()) rocprofsys_preinit_rocpd(); + rocprofsys_preinit_cache(); if(get_use_process_sampling()) { @@ -776,10 +757,16 @@ rocprofsys_finalize_hidden(void) rocprofiler_sdk::shutdown(); } #endif + auto& _manager = rocprofsys::trace_cache::cache_manager::get_instance(); + _manager.shutdown(); + _manager.post_process(); + +#if ROCPROFSYS_USE_ROCM > 0 if(get_use_rocpd()) { rocpd::data_processor::get_instance().flush(); } +#endif set_state(State::Finalized); std::quick_exit(EXIT_SUCCESS); return; @@ -880,6 +867,12 @@ rocprofsys_finalize_hidden(void) } #endif + { + auto& _manager = rocprofsys::trace_cache::cache_manager::get_instance(); + _manager.shutdown(); + _manager.post_process(); + } + ROCPROFSYS_DEBUG_F("Stopping and destroying instrumentation bundles...\n"); for(size_t i = 0; i < thread_info::get_peak_num_threads(); ++i) { @@ -1070,10 +1063,12 @@ rocprofsys_finalize_hidden(void) [](int) {}); common::destroy_static_objects(); +#if ROCPROFSYS_USE_ROCM > 0 if(get_use_rocpd()) { rocpd::data_processor::get_instance().flush(); } +#endif } //======================================================================================// diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/amd_smi.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/amd_smi.cpp index f52c7cd1d8..f27f236fb1 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/amd_smi.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/amd_smi.cpp @@ -27,6 +27,8 @@ // THE SOFTWARE. #include "core/agent.hpp" +#include "core/trace_cache/cache_manager.hpp" +#include "core/trace_cache/cache_utility.hpp" #if defined(NDEBUG) # undef NDEBUG #endif @@ -41,6 +43,7 @@ #include "core/perfetto.hpp" #include "core/rocpd/data_processor.hpp" #include "core/state.hpp" +#include "core/trace_cache/metadata_registry.hpp" #include "library/amd_smi.hpp" #include "library/runtime.hpp" #include "library/thread_info.hpp" @@ -85,34 +88,31 @@ get_data_processor() } void -rocpd_initialize_category() +metadata_initialize_category() { - get_data_processor().insert_category(ROCPROFSYS_CATEGORY_AMD_SMI, - trait::name::value); + trace_cache::get_metadata_registry().add_string( + trait::name::value); } void -rocpd_initialize_smi_tracks() +metadata_initialize_smi_tracks() { - auto& data_processor = get_data_processor(); - auto& n_info = node_info::get_instance(); - const auto thread_id = std::nullopt; // Internal thread ID for amd-smi + const auto thread_id = std::nullopt; - data_processor.insert_track(trait::name::value, n_info.id, - getpid(), thread_id); - data_processor.insert_track(trait::name::value, n_info.id, - getpid(), thread_id); - data_processor.insert_track(trait::name::value, n_info.id, - getpid(), thread_id); - data_processor.insert_track(trait::name::value, - n_info.id, getpid(), thread_id); + trace_cache::get_metadata_registry().add_track( + { trait::name::value, thread_id, "{}" }); + trace_cache::get_metadata_registry().add_track( + { trait::name::value, thread_id, "{}" }); + trace_cache::get_metadata_registry().add_track( + { trait::name::value, thread_id, "{}" }); + trace_cache::get_metadata_registry().add_track( + { trait::name::value, thread_id, "{}" }); } void -rocpd_initialize_smi_pmc(size_t gpu_id) +metadata_initialize_smi_pmc(size_t gpu_id) { - auto& data_processor = get_data_processor(); - // find the proper values for a following definitions + // TODO: Find the proper values for a following definitions size_t EVENT_CODE = 0; size_t INSTANCE_ID = 0; const char* LONG_DESCRIPTION = ""; @@ -121,34 +121,33 @@ rocpd_initialize_smi_pmc(size_t gpu_id) const char* EXPRESSION = ""; const char* CELSIUS_DEGREES = "\u00B0C"; auto ni = node_info::get_instance(); - const auto* TARGET_ARCH = "GPU"; + const char* TARGET_ARCH = "GPU"; - auto& _agent_manager = agent_manager::get_instance(); - auto base_id = _agent_manager.get_agent_by_id(gpu_id, agent_type::GPU).base_id; + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + trait::name::value, "Busy", + trait::name::description, LONG_DESCRIPTION, + COMPONENT, trace_cache::PERCENTAGE, rocprofsys::trace_cache::ABSOLUTE, BLOCK, + EXPRESSION, 0, 0, "{}" }); - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - trait::name::value, "Busy", - trait::name::description, LONG_DESCRIPTION, COMPONENT, - "%", "ABS", BLOCK, EXPRESSION, 0, 0); + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + trait::name::value, "Temp", + trait::name::description, LONG_DESCRIPTION, COMPONENT, + CELSIUS_DEGREES, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - trait::name::value, "Temp", - trait::name::description, LONG_DESCRIPTION, COMPONENT, - CELSIUS_DEGREES, "ABS", BLOCK, EXPRESSION, 0, 0); + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + trait::name::value, "Pow", + trait::name::description, LONG_DESCRIPTION, COMPONENT, + "W", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - trait::name::value, "Pow", - trait::name::description, LONG_DESCRIPTION, COMPONENT, - "w", "ABS", BLOCK, EXPRESSION, 0, 0); - - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - trait::name::value, "MemUsg", - trait::name::description, LONG_DESCRIPTION, - COMPONENT, "MB", "ABS", BLOCK, EXPRESSION, 0, 0); + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::GPU, gpu_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + trait::name::value, "MemUsg", + trait::name::description, LONG_DESCRIPTION, + COMPONENT, tim::units::mem_repr(tim::units::megabyte), + rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); } void @@ -159,10 +158,14 @@ rocpd_process_smi_pmc_events(const uint32_t device_id, const amd_smi::settings& if(!(settings.busy || settings.temp || settings.power || settings.mem_usage)) return; auto& data_processor = get_data_processor(); - auto event_id = data_processor.insert_event(ROCPROFSYS_CATEGORY_AMD_SMI, 0, 0, 0); + + const auto* _name = trait::name::value; + auto name_primary_key = data_processor.insert_string(_name); + auto event_id = data_processor.insert_event(name_primary_key, 0, 0, 0); auto& _agent_manager = agent_manager::get_instance(); - auto base_id = _agent_manager.get_agent_by_id(device_id, agent_type::GPU).base_id; + auto base_id = + _agent_manager.get_agent_by_type_index(device_id, agent_type::GPU).base_id; auto insert_event_and_sample = [&](bool enabled, const char* name, double value) { if(!enabled) return; @@ -392,10 +395,12 @@ config() for(auto itr : data::device_list) data::get_initial().at(itr).sample(itr); - if(get_use_rocpd()) + metadata_initialize_category(); + metadata_initialize_smi_tracks(); + + for(const auto& _dev_id : data::device_list) { - rocpd_initialize_category(); - rocpd_initialize_smi_tracks(); + metadata_initialize_smi_pmc(_dev_id); } } @@ -483,11 +488,6 @@ data::post_process(uint32_t _dev_id) auto use_perfetto = get_use_perfetto(); auto use_rocpd = get_use_rocpd(); - if(use_rocpd) - { - rocpd_initialize_smi_pmc(_dev_id); - } - for(auto& itr : _amd_smi) { using counter_track = perfetto_counter_track; @@ -788,6 +788,7 @@ setup() is_initialized() = true; data::setup(); + } catch(std::runtime_error& _e) { ROCPROFSYS_VERBOSE(0, "Exception thrown when initializing amd-smi: %s\n", diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.cpp index 3bcc744db4..74477c42f6 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.cpp @@ -23,6 +23,7 @@ #include "library/components/backtrace_metrics.hpp" #include "core/agent.hpp" #include "core/agent_manager.hpp" +#include "core/common.hpp" #include "core/components/fwd.hpp" #include "core/config.hpp" #include "core/debug.hpp" @@ -167,7 +168,141 @@ get_enabled(tim::type_list) (_v.set(_n++, trait::runtime_enabled::get()), ...); return _v; } + +void +rocpd_init_categories() +{ + static bool _is_initialized = false; + if(_is_initialized) return; + + auto& data_processor = get_data_processor(); + + data_processor.insert_string(trait::name::value); + data_processor.insert_string(trait::name::value); + data_processor.insert_string(trait::name::value); + data_processor.insert_string(trait::name::value); + data_processor.insert_string(trait::name::value); + + _is_initialized = true; +} + +template +void +rocpd_init_tracks(int64_t _tid) +{ + auto& data_processor = get_data_processor(); + auto& n_info = node_info::get_instance(); + const auto& t_info = thread_info::get(_tid, SequentTID); + auto _tid_name = JOIN("", '[', _tid, ']'); + + auto thread_idx = data_processor.insert_thread_info( + n_info.id, getppid(), getpid(), t_info->index_data->system_value, + JOIN(" ", "Thread", _tid).c_str(), t_info->get_start(), t_info->get_stop(), "{}"); + + if constexpr(std::is_same_v) + { + // Initialize hw_counter_tracks and create one track for each hardware counter + auto _hw_cnt_labels = *get_papi_labels(_tid); + for(auto& itr : _hw_cnt_labels) + { + std::string _desc = tim::papi::get_event_info(itr).short_descr; + if(_desc.empty()) _desc = itr; + ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str()); + + std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)"); + data_processor.insert_track(track_name.c_str(), n_info.id, getpid(), + thread_idx, "{}"); + } + } + else + data_processor.insert_track( + JOIN('_', trait::name::value, _tid_name).c_str(), n_info.id, + getpid(), thread_idx, "{}"); +} + +template +void +rocpd_initialize_backtrace_metrics_pmc(size_t dev_id, const char* units, int64_t _tid) +{ + auto& data_processor = get_data_processor(); + auto _tid_name = JOIN("", '[', _tid, ']'); + + size_t EVENT_CODE = 0; + size_t INSTANCE_ID = 0; + const char* LONG_DESCRIPTION = ""; + const char* COMPONENT = ""; + const char* BLOCK = ""; + const char* EXPRESSION = ""; + auto ni = node_info::get_instance(); + const auto TARGET_ARCH = "CPU"; + + auto& agent_mngr = agent_manager::get_instance(); + auto base_id = agent_mngr.get_agent_by_id(dev_id, agent_type::CPU).base_id; + + if constexpr(std::is_same_v) + { + auto _hw_cnt_labels = *get_papi_labels(_tid); + for(auto& itr : _hw_cnt_labels) + { + std::string _desc = tim::papi::get_event_info(itr).short_descr; + if(_desc.empty()) _desc = itr; + ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str()); + + std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)"); + + data_processor.insert_pmc_description( + ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + track_name.c_str(), trait::name::value, + trait::name::description, LONG_DESCRIPTION, COMPONENT, units, + "ABS", BLOCK, EXPRESSION, 0, 0); + } + } + else + data_processor.insert_pmc_description( + ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + JOIN("_", trait::name::value, _tid_name).c_str(), + trait::name::value, trait::name::description, + LONG_DESCRIPTION, COMPONENT, units, "ABS", BLOCK, EXPRESSION, 0, 0); +} + +template +void +rocpd_process_backtrace_metrics_events(const uint32_t device_id, uint64_t timestamp, + Value value, int64_t _tid) +{ + auto& data_processor = get_data_processor(); + auto _tid_name = JOIN("", '[', _tid, ']'); + + auto string_primary_key = data_processor.insert_string(trait::name::value); + auto event_id = data_processor.insert_event(string_primary_key, 0, 0, 0); + auto& agent_mngr = agent_manager::get_instance(); + auto base_id = agent_mngr.get_agent_by_id(device_id, agent_type::CPU).base_id; + + auto insert_event_and_sample = [&](const char* _name, double _value) { + data_processor.insert_pmc_event(event_id, base_id, _name, _value); + data_processor.insert_sample(_name, timestamp, event_id); + }; + + if constexpr(std::is_same_v) + { + auto _hw_cnt_labels = *get_papi_labels(_tid); + const auto& hw_counters = + static_cast(value); + for(size_t i = 0; i < _hw_cnt_labels.size() && i < hw_counters.size(); ++i) + { + std::string _desc = tim::papi::get_event_info(_hw_cnt_labels[i]).short_descr; + if(_desc.empty()) _desc = _hw_cnt_labels[i]; + std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)"); + + insert_event_and_sample(track_name.c_str(), hw_counters.at(i)); + } + } + else + insert_event_and_sample( + JOIN("_", trait::name::value, _tid_name).c_str(), value); +} } // namespace + void backtrace_metrics::sample(int) { @@ -337,146 +472,6 @@ backtrace_metrics::fini_perfetto(int64_t _tid, valid_array_t _valid) } } -void -rocpd_init_categories() -{ - static bool _is_initialized = false; - if(_is_initialized) return; - - get_data_processor().insert_category( - category_enum_id::value, - trait::name::value); - get_data_processor().insert_category( - category_enum_id::value, - trait::name::value); - get_data_processor().insert_category( - category_enum_id::value, - trait::name::value); - get_data_processor().insert_category( - category_enum_id::value, - trait::name::value); - get_data_processor().insert_category( - category_enum_id::value, - trait::name::value); - - _is_initialized = true; -} - -template -void -rocpd_init_tracks(int64_t _tid) -{ - auto& data_processor = get_data_processor(); - auto& n_info = node_info::get_instance(); - const auto& t_info = thread_info::get(_tid, SequentTID); - auto _tid_name = JOIN("", '[', _tid, ']'); - - auto thread_idx = data_processor.insert_thread_info( - n_info.id, getppid(), getpid(), t_info->index_data->system_value, - JOIN(" ", "Thread", _tid).c_str(), t_info->get_start(), t_info->get_stop(), "{}"); - - if constexpr(std::is_same_v) - { - // Initialize hw_counter_tracks and create one track for each hardware counter - auto _hw_cnt_labels = *get_papi_labels(_tid); - for(auto& itr : _hw_cnt_labels) - { - std::string _desc = tim::papi::get_event_info(itr).short_descr; - if(_desc.empty()) _desc = itr; - ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str()); - - std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)"); - data_processor.insert_track(track_name.c_str(), n_info.id, getpid(), - thread_idx, "{}"); - } - } - else - data_processor.insert_track( - JOIN('_', trait::name::value, _tid_name).c_str(), n_info.id, - getpid(), thread_idx, "{}"); -} - -template -void -rocpd_initialize_backtrace_metrics_pmc(size_t dev_id, const char* units, int64_t _tid) -{ - auto& data_processor = get_data_processor(); - auto _tid_name = JOIN("", '[', _tid, ']'); - - size_t EVENT_CODE = 0; - size_t INSTANCE_ID = 0; - const char* LONG_DESCRIPTION = ""; - const char* COMPONENT = ""; - const char* BLOCK = ""; - const char* EXPRESSION = ""; - auto ni = node_info::get_instance(); - const auto* TARGET_ARCH = "CPU"; - - auto& _agent_manager = agent_manager::get_instance(); - auto _base_id = _agent_manager.get_agent_by_id(dev_id, agent_type::CPU).base_id; - - if constexpr(std::is_same_v) - { - auto _hw_cnt_labels = *get_papi_labels(_tid); - for(auto& itr : _hw_cnt_labels) - { - std::string _desc = tim::papi::get_event_info(itr).short_descr; - if(_desc.empty()) _desc = itr; - ROCPROFSYS_CI_THROW(_desc.empty(), "Empty description for %s\n", itr.c_str()); - - std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)"); - - data_processor.insert_pmc_description( - ni.id, getpid(), _base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - track_name.c_str(), trait::name::value, - trait::name::description, LONG_DESCRIPTION, COMPONENT, units, - "ABS", BLOCK, EXPRESSION, 0, 0); - } - } - else - data_processor.insert_pmc_description( - ni.id, getpid(), _base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - JOIN("_", trait::name::value, _tid_name).c_str(), - trait::name::value, trait::name::description, - LONG_DESCRIPTION, COMPONENT, units, "ABS", BLOCK, EXPRESSION, 0, 0); -} - -template -void -rocpd_process_backtrace_metrics_events(const uint32_t device_id, uint64_t timestamp, - Value value, int64_t _tid) -{ - auto& data_processor = get_data_processor(); - auto _tid_name = JOIN("", '[', _tid, ']'); - auto event_id = - data_processor.insert_event(category_enum_id::value, 0, 0, 0); - auto& agent_mngr = agent_manager::get_instance(); - auto base_id = agent_mngr.get_agent_by_id(device_id, agent_type::CPU).base_id; - - auto insert_event_and_sample = [&](const char* name, double _value) { - data_processor.insert_pmc_event(event_id, base_id, name, _value); - data_processor.insert_sample(name, timestamp, event_id); - }; - - if constexpr(std::is_same_v) - { - auto _hw_cnt_labels = *get_papi_labels(_tid); - const auto& _hw_counters = - static_cast(value); - for(size_t i = 0; i < _hw_cnt_labels.size() && i < _hw_counters.size(); ++i) - { - std::string _desc = tim::papi::get_event_info(_hw_cnt_labels[i]).short_descr; - if(_desc.empty()) _desc = _hw_cnt_labels[i]; - std::string track_name = JOIN(' ', "Thread", _desc, _tid_name, "(S)"); - - insert_event_and_sample(track_name.c_str(), _hw_counters.at(i)); - } - } - else - insert_event_and_sample( - JOIN("_", trait::name::value, _tid_name).c_str(), value); -} - void backtrace_metrics::init_rocpd(int64_t _tid, valid_array_t _valid) { @@ -641,8 +636,10 @@ backtrace_metrics::post_process_perfetto(int64_t _tid, uint64_t _ts) const } void -backtrace_metrics::post_process_rocpd(int64_t _tid, uint64_t _ts) const +backtrace_metrics::post_process_rocpd([[maybe_unused]] int64_t _tid, + [[maybe_unused]] uint64_t _ts) const { +#if ROCPROFSYS_USE_ROCM > 0 auto is_category_enabled = [&](const auto& _category) { return (*this)(_category); }; if(is_category_enabled(category::thread_cpu_time{})) @@ -675,6 +672,7 @@ backtrace_metrics::post_process_rocpd(int64_t _tid, uint64_t _ts) const hw_counter_data_t>(0, _ts, m_hw_counter, _tid); } +#endif } } // namespace component } // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp index 6625b7ddbe..2d5d0bf432 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp @@ -26,7 +26,8 @@ #include "core/config.hpp" #include "core/node_info.hpp" #include "core/perfetto.hpp" -#include "core/rocpd/data_processor.hpp" +#include "core/trace_cache/cache_manager.hpp" +#include "core/trace_cache/sample_type.hpp" #include "library/tracing.hpp" #include @@ -76,41 +77,25 @@ write_perfetto_counter_track(uint64_t _val) namespace { -rocpd::data_processor& -get_data_processor() -{ - return rocpd::data_processor::get_instance(); -} - void -rocpd_initialize_comm_data_categories() +metadata_initialize_comm_data_categories() { static bool _is_initialized = false; if(_is_initialized) return; - get_data_processor().insert_category(category_enum_id::value, - trait::name::value); -#if defined(ROCPROFSYS_USE_MPI) - get_data_processor().insert_category(category_enum_id::value, - trait::name::value); -#endif -#if defined(ROCPROFSYS_USE_RCCL) - get_data_processor().insert_category(category_enum_id::value, - trait::name::value); -#endif + trace_cache::get_metadata_registry().add_string( + trait::name::value); + trace_cache::get_metadata_registry().add_string(trait::name::value); + _is_initialized = true; } template void -rocpd_initialize_track() +metadata_initialize_track() { - auto& n_info = node_info::get_instance(); - auto thread_id = std::nullopt; - auto _init_track = [&](const char* label) { - ROCPROFSYS_VERBOSE(3, "INSERT_TRACK label: %s, node ID: %d, Process ID: %d", - label, n_info.id, getpid()); - get_data_processor().insert_track(label, n_info.id, getpid(), thread_id); + auto _init_track = [&](const char* label) { + trace_cache::get_metadata_registry().add_track({ label, std::nullopt, "{}" }); }; static std::once_flag _once{}; @@ -118,9 +103,8 @@ rocpd_initialize_track() } void -rocpd_initialize_comm_data_pmc() +metadata_initialize_comm_data_pmc() { - [[maybe_unused]] auto& data_processor = get_data_processor(); // find the proper values for a following definitions [[maybe_unused]] size_t EVENT_CODE = 0; [[maybe_unused]] size_t INSTANCE_ID = 0; @@ -131,56 +115,28 @@ rocpd_initialize_comm_data_pmc() [[maybe_unused]] constexpr const char* MSG = "bytes"; [[maybe_unused]] constexpr const auto* TARGET_ARCH = "CPU"; auto ni = node_info::get_instance(); - constexpr const auto DEVICE_ID = 0; // Assuming CPU device ID is 0 - - auto& _agent_manager = agent_manager::get_instance(); - [[maybe_unused]] auto base_id = - _agent_manager.get_agent_by_id(DEVICE_ID, agent_type::CPU).base_id; + [[maybe_unused]] constexpr const auto DEVICE_ID = 0; // Assuming CPU device ID is 0 #if defined(ROCPROFSYS_USE_MPI) - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - comm_data::mpi_send::label, "Tracks MPI Send communication data sizes", - trait::name::description, LONG_DESCRIPTION, COMPONENT, MSG, "ABS", - BLOCK, EXPRESSION, 0, 0); - - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - comm_data::mpi_recv::label, "Tracks MPI Receive communication data sizes", - trait::name::description, LONG_DESCRIPTION, COMPONENT, MSG, "ABS", - BLOCK, EXPRESSION, 0, 0); -#endif -#if defined(ROCPROFSYS_USE_RCCL) - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, rccl_send::label, - "Tracks RCCL Send communication data sizes", - trait::name::description, LONG_DESCRIPTION, COMPONENT, MSG, - "ABS", BLOCK, EXPRESSION, 0, 0); - - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, rccl_recv::label, - "Tracks RCCL Receive communication data sizes", - trait::name::description, LONG_DESCRIPTION, COMPONENT, MSG, - "ABS", BLOCK, EXPRESSION, 0, 0); + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::CPU, DEVICE_ID, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + comm_data::mpi_send::label, "Tracks MPI communication data sizes", + trait::name::description, LONG_DESCRIPTION, COMPONENT, MSG, + rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::CPU, DEVICE_ID, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + comm_data::mpi_recv::label, "Tracks MPI communication data sizes", + trait::name::description, LONG_DESCRIPTION, COMPONENT, MSG, + rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); #endif } template void -rocpd_process_cpu_usage_events(const uint32_t device_id, int bytes) +cache_comm_data_events(const uint32_t device_id, int bytes) { - auto& data_processor = get_data_processor(); - auto event_id = data_processor.insert_event( - category_enum_id::value, 0, 0, 0); - auto& agents = agent_manager::get_instance(); - auto agent = agents.get_agent_by_id(device_id, agent_type::CPU); - - auto insert_event_and_sample = [&](const char* name, uint64_t timestamp, - double value) { - data_processor.insert_pmc_event(event_id, agent.device_id, name, value); - data_processor.insert_sample(name, timestamp, event_id); - }; + auto agent = agents.get_agent_by_type_index(device_id, agent_type::CPU); static std::mutex _mutex{}; static uint64_t value = 0; @@ -190,8 +146,20 @@ rocpd_process_cpu_usage_events(const uint32_t device_id, int bytes) _now = rocprofsys::tracing::now(); bytes = (value += bytes); } + const std::string track_name = Track::label; + const size_t timestamp_ns = _now; + const std::string event_metadata = "{}"; + const size_t stack_id = 0; + const size_t parent_stack_id = 0; + const size_t correlation_id = 0; + const std::string call_stack = "{}"; + const std::string line_info = "{}"; + const size_t agent_handle = agent.handle; - insert_event_and_sample(Track::label, _now, bytes); + trace_cache::get_buffer_storage().store( + trace_cache::entry_type::pmc_event_with_sample, track_name.c_str(), timestamp_ns, + event_metadata.c_str(), stack_id, parent_stack_id, correlation_id, + call_stack.c_str(), line_info.c_str(), agent_handle, track_name.c_str(), value); } } // namespace @@ -199,10 +167,14 @@ rocpd_process_cpu_usage_events(const uint32_t device_id, int bytes) void comm_data::start() { - if(get_use_rocpd()) { - rocpd_initialize_comm_data_categories(); - rocpd_initialize_comm_data_pmc(); + metadata_initialize_comm_data_categories(); + metadata_initialize_comm_data_pmc(); + +#if defined(ROCPROFSYS_USE_MPI) + metadata_initialize_track(); + metadata_initialize_track(); +#endif } } @@ -248,10 +220,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int cou write_perfetto_counter_track(count * _size); - if(get_use_rocpd()) { - rocpd_initialize_track(); - rocpd_process_cpu_usage_events(0, count * _size); + cache_comm_data_events(0, count * _size); } if(rocprofsys::get_use_timemory()) @@ -276,10 +246,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count, if(get_use_perfetto()) write_perfetto_counter_track(count * _size); - if(get_use_rocpd()) { - rocpd_initialize_track(); - rocpd_process_cpu_usage_events(0, count * _size); + cache_comm_data_events(0, count * _size); } if(rocprofsys::get_use_timemory()) @@ -304,10 +272,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int cou if(get_use_perfetto()) write_perfetto_counter_track(count * _size); - if(get_use_rocpd()) { - rocpd_initialize_track(); - rocpd_process_cpu_usage_events(0, count * _size); + cache_comm_data_events(0, count * _size); } if(rocprofsys::get_use_timemory()) @@ -332,10 +298,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count, if(get_use_perfetto()) write_perfetto_counter_track(count * _size); - if(get_use_rocpd()) { - rocpd_initialize_track(); - rocpd_process_cpu_usage_events(0, count * _size); + cache_comm_data_events(0, count * _size); } if(rocprofsys::get_use_timemory()) @@ -360,10 +324,8 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, void*, int count, if(get_use_perfetto()) write_perfetto_counter_track(count * _size); - if(get_use_rocpd()) { - rocpd_initialize_track(); - rocpd_process_cpu_usage_events(0, count * _size); + cache_comm_data_events(0, count * _size); } if(rocprofsys::get_use_timemory()) @@ -389,12 +351,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, void*, write_perfetto_counter_track(count * _size); } - if(get_use_rocpd()) { - rocpd_initialize_track(); - rocpd_initialize_track(); - rocpd_process_cpu_usage_events(0, count * _size); - rocpd_process_cpu_usage_events(0, count * _size); + cache_comm_data_events(0, count * _size); + cache_comm_data_events(0, count * _size); } if(rocprofsys::get_use_timemory()) add(_data, count * _size); @@ -416,10 +375,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen write_perfetto_counter_track(recvcount * _recv_size); } - if(get_use_rocpd()) { - rocpd_process_cpu_usage_events(0, sendcount * _send_size); - rocpd_process_cpu_usage_events(0, recvcount * _send_size); + cache_comm_data_events(0, sendcount * _send_size); + cache_comm_data_events(0, recvcount * _send_size); } if(rocprofsys::get_use_timemory()) @@ -467,10 +425,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen write_perfetto_counter_track(recvcount * _recv_size); } - if(get_use_rocpd()) { - rocpd_process_cpu_usage_events(0, sendcount * _send_size); - rocpd_process_cpu_usage_events(0, recvcount * _send_size); + cache_comm_data_events(0, sendcount * _send_size); + cache_comm_data_events(0, recvcount * _send_size); } if(rocprofsys::get_use_timemory()) @@ -501,10 +458,9 @@ comm_data::audit(const gotcha_data& _data, audit::incoming, const void*, int sen write_perfetto_counter_track(recvcount * _recv_size); } - if(get_use_rocpd()) { - rocpd_process_cpu_usage_events(0, sendcount * _send_size); - rocpd_process_cpu_usage_events(0, recvcount * _recv_size); + cache_comm_data_events(0, sendcount * _send_size); + cache_comm_data_events(0, recvcount * _recv_size); } if(rocprofsys::get_use_timemory()) diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/cpu_freq.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/cpu_freq.cpp index 55d84be6ef..7c0e1ac20e 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/cpu_freq.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/cpu_freq.cpp @@ -29,6 +29,8 @@ #include "core/node_info.hpp" #include "core/perfetto.hpp" #include "core/rocpd/data_processor.hpp" +#include "core/timemory.hpp" +#include "core/trace_cache/cache_manager.hpp" #include "library/components/cpu_freq.hpp" #include "library/thread_info.hpp" @@ -91,54 +93,45 @@ get_data_processor() } void -rocpd_initialize_cpu_freq_category() +metadata_initialize_cpu_freq_category() { - get_data_processor().insert_category(ROCPROFSYS_CATEGORY_CPU_FREQ, - trait::name::value); + trace_cache::get_metadata_registry().add_string( + trait::name::value); } void -rocpd_initialize_cpu_freq_tracks() +metadata_initialize_cpu_freq_tracks() { - auto& data_processor = get_data_processor(); - auto& n_info = node_info::get_instance(); - const auto thread_idx = std::nullopt; // Internal thread ID for cpu-freq - do_for_enabled_cpus([&](size_t cpu_id) { - data_processor.insert_track( - get_cpu_freq_track_name(cpu_id).c_str(), n_info.id, - getpid(), thread_idx); + trace_cache::get_metadata_registry().add_track( + { get_cpu_freq_track_name(cpu_id).c_str(), std::nullopt, + "{}" }); }); } void -rocpd_initialize_cpu_usage_tracks() +metadata_initialize_cpu_usage_tracks() { - auto& data_processor = get_data_processor(); - auto& n_info = node_info::get_instance(); - const auto thread_idx = std::nullopt; // Internal thread ID for cpu-freq - - data_processor.insert_track(trait::name::value, n_info.id, - getpid(), thread_idx); - data_processor.insert_track(trait::name::value, n_info.id, - getpid(), thread_idx); - data_processor.insert_track(trait::name::value, n_info.id, - getpid(), thread_idx); - data_processor.insert_track(trait::name::value, - n_info.id, getpid(), thread_idx); - data_processor.insert_track(trait::name::value, - n_info.id, getpid(), thread_idx); - data_processor.insert_track(trait::name::value, - n_info.id, getpid(), thread_idx); - data_processor.insert_track(trait::name::value, - n_info.id, getpid(), thread_idx); + trace_cache::get_metadata_registry().add_track( + { trait::name::value, std::nullopt, "{}" }); + trace_cache::get_metadata_registry().add_track( + { trait::name::value, std::nullopt, "{}" }); + trace_cache::get_metadata_registry().add_track( + { trait::name::value, std::nullopt, "{}" }); + trace_cache::get_metadata_registry().add_track( + { trait::name::value, std::nullopt, "{}" }); + trace_cache::get_metadata_registry().add_track( + { trait::name::value, std::nullopt, "{}" }); + trace_cache::get_metadata_registry().add_track( + { trait::name::value, std::nullopt, "{}" }); + trace_cache::get_metadata_registry().add_track( + { trait::name::value, std::nullopt, "{}" }); } void -rocpd_initialize_cpu_freq_pmc(size_t dev_id) +metadata_initialize_cpu_freq_pmc(size_t dev_id) { - auto& data_processor = get_data_processor(); - // find the proper values for a following definitions + // TODO: Find the proper values for a following definitions size_t EVENT_CODE = 0; size_t INSTANCE_ID = 0; const char* LONG_DESCRIPTION = ""; @@ -150,58 +143,56 @@ rocpd_initialize_cpu_freq_pmc(size_t dev_id) auto ni = node_info::get_instance(); const auto* TARGET_ARCH = "CPU"; - auto& _agent_manager = agent_manager::get_instance(); - auto base_id = _agent_manager.get_agent_by_id(dev_id, agent_type::CPU).base_id; - do_for_enabled_cpus([&](size_t cpu_id) { - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - get_cpu_freq_track_name(cpu_id).c_str(), "Frequency", - trait::name::description, LONG_DESCRIPTION, COMPONENT, - component::cpu_freq::display_unit().c_str(), "ABS", BLOCK, EXPRESSION, 0, 0); + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + get_cpu_freq_track_name(cpu_id).c_str(), "Frequency", + trait::name::description, LONG_DESCRIPTION, COMPONENT, + component::cpu_freq::display_unit().c_str(), + rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); }); - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - trait::name::value, "Memory Usage", - trait::name::description, LONG_DESCRIPTION, COMPONENT, - MEMORY, "ABS", BLOCK, EXPRESSION, 0, 0); + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + trait::name::value, "Memory Usage", + trait::name::description, LONG_DESCRIPTION, COMPONENT, + MEMORY, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - trait::name::value, "Virtual Memory Usage", - trait::name::description, LONG_DESCRIPTION, COMPONENT, - MEMORY, "ABS", BLOCK, EXPRESSION, 0, 0); + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + trait::name::value, "Virtual Memory Usage", + trait::name::description, LONG_DESCRIPTION, COMPONENT, + MEMORY, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - trait::name::value, "Peak Memory", - trait::name::description, LONG_DESCRIPTION, COMPONENT, - MEMORY, "ABS", BLOCK, EXPRESSION, 0, 0); + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + trait::name::value, "Peak Memory", + trait::name::description, LONG_DESCRIPTION, COMPONENT, + MEMORY, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - trait::name::value, "Context Switches", - trait::name::description, LONG_DESCRIPTION, - COMPONENT, "", "ABS", BLOCK, EXPRESSION, 0, 0); + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + trait::name::value, "Context Switches", + trait::name::description, LONG_DESCRIPTION, + COMPONENT, "", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - trait::name::value, "Page Faults", - trait::name::description, LONG_DESCRIPTION, - COMPONENT, "", "ABS", BLOCK, EXPRESSION, 0, 0); + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + trait::name::value, "Page Faults", + trait::name::description, LONG_DESCRIPTION, + COMPONENT, "", rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - trait::name::value, "User Time", - trait::name::description, LONG_DESCRIPTION, - COMPONENT, TIME, "ABS", BLOCK, EXPRESSION, 0, 0); + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + trait::name::value, "User Time", + trait::name::description, LONG_DESCRIPTION, + COMPONENT, TIME, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); - data_processor.insert_pmc_description( - ni.id, getpid(), base_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, - trait::name::value, "Kernel Time", - trait::name::description, LONG_DESCRIPTION, - COMPONENT, TIME, "ABS", BLOCK, EXPRESSION, 0, 0); + trace_cache::get_metadata_registry().add_pmc_info( + { agent_type::CPU, dev_id, TARGET_ARCH, EVENT_CODE, INSTANCE_ID, + trait::name::value, "Kernel Time", + trait::name::description, LONG_DESCRIPTION, + COMPONENT, TIME, rocprofsys::trace_cache::ABSOLUTE, BLOCK, EXPRESSION, 0, 0 }); } void @@ -211,11 +202,13 @@ rocpd_process_cpu_usage_events(const uint32_t device_id, uint64_t timestamp, double context_switch, double page_fault, double user_time, double kernel_time) { - auto& data_processor = get_data_processor(); - auto event_id = data_processor.insert_event(ROCPROFSYS_CATEGORY_CPU_FREQ, 0, 0, 0); + auto& data_processor = get_data_processor(); + const auto* _name = trait::name::value; + auto name_primary_key = data_processor.insert_string(_name); + auto event_id = data_processor.insert_event(name_primary_key, 0, 0, 0); auto& agent_mngr = agent_manager::get_instance(); - auto base_id = agent_mngr.get_agent_by_id(device_id, agent_type::CPU).base_id; + auto base_id = agent_mngr.get_agent_by_type_index(device_id, agent_type::CPU).base_id; auto insert_event_and_sample = [&](const char* name, double value) { data_processor.insert_pmc_event(event_id, base_id, name, value); @@ -258,6 +251,21 @@ setup() category::process_page_fault, category::process_user_mode_time, category::process_kernel_mode_time>{}); } + + metadata_initialize_cpu_freq_category(); + metadata_initialize_cpu_usage_tracks(); + metadata_initialize_cpu_freq_tracks(); + + // `get_enabled_cpus()` returns the number of cores enabled for monitoring but + // the actual device_id is 0, since there is a single device available. And + // the agents seems to be assigned per device basis not per core. + // TODO: `get_enabled_cpus()` should be fixed in the future to align with GPU + // implementation. + auto cpu_agents = agent_manager::get_instance().get_agents_by_type(agent_type::CPU); + for(auto& agent : cpu_agents) + { + metadata_initialize_cpu_freq_pmc(agent->device_id); + } } void @@ -343,25 +351,6 @@ post_process() auto& enabled_cpus = component::cpu_freq::get_enabled_cpus(); - if(get_use_rocpd()) - { - rocpd_initialize_cpu_freq_category(); - rocpd_initialize_cpu_usage_tracks(); - rocpd_initialize_cpu_freq_tracks(); - - // `get_enabled_cpus()` returns the number of cores enabled for monitoring but the - // actually device_id is 0, since there is a single device available. And the - // agents seems to be assigned per device basis not per core. - // TODO: `get_enabled_cpus()` should be fixed in the future to align with GPU - // implementation. - auto cpu_agents = - agent_manager::get_instance().get_agents_by_type(agent_type::CPU); - for(auto& agent : cpu_agents) - { - rocpd_initialize_cpu_freq_pmc(agent->device_id); - } - } - auto _process_frequencies = [](size_t _idx, size_t _offset) { using freq_track = perfetto_counter_track; diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/kokkosp.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/kokkosp.cpp index b8883b3f08..23cb895356 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/kokkosp.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/kokkosp.cpp @@ -31,10 +31,12 @@ #include "core/defines.hpp" #include "core/node_info.hpp" #include "core/perfetto.hpp" -#include "core/rocpd/data_processor.hpp" #include "core/rocpd/json.hpp" +#include "core/trace_cache/cache_manager.hpp" +#include "core/trace_cache/sample_type.hpp" #include "library/components/category_region.hpp" #include "library/runtime.hpp" +#include #include #include @@ -157,47 +159,41 @@ violates_name_rules(Arg&& _arg, Args&&... _args) namespace { -rocprofsys::rocpd::data_processor& -get_data_processor() -{ - return rocprofsys::rocpd::data_processor::get_instance(); -} - void -rocpd_initialize_kokkos_category() +metadata_initialize_kokkos_category() { - get_data_processor().insert_category( - rocprofsys::category_enum_id::value, + rocprofsys::trace_cache::get_metadata_registry().add_string( rocprofsys::trait::name::value); } void -rocpd_initialize_kokkos_track() +metadata_initialize_kokkos_track() { - auto& data_processor = get_data_processor(); - auto& n_info = rocprofsys::node_info::get_instance(); - auto thread_id = std::nullopt; - - data_processor.insert_track(rocprofsys::trait::name::value, - n_info.id, getpid(), thread_id); + rocprofsys::trace_cache::get_metadata_registry().add_track( + { rocprofsys::trait::name::value, std::nullopt, "{}" }); } void -rocpd_process_kokkos_event(const char* name, const char* event_type, const char* target, - uint64_t timestamp_ns) +cache_kokkos_event(const char* name, const char* event_type, const char* target, + uint64_t timestamp_ns) { - auto& data_processor = get_data_processor(); - auto event_metadata = rocpd::json::create(); + auto event_metadata = rocpd::json::create(); event_metadata->set("name", name); event_metadata->set("event_type", event_type); event_metadata->set("target", target); - auto event_id = data_processor.insert_event( - rocprofsys::category_enum_id::value, 0, 0, 0, "{}", "{}", - event_metadata->to_string().c_str()); - data_processor.insert_sample(rocprofsys::trait::name::value, - timestamp_ns, event_id, "{}"); + const size_t stack_id = 0; + const size_t parent_stack_id = 0; + const size_t correlation_id = 0; + const char* call_stack = "{}"; + const char* line_info = "{}"; + + rocprofsys::trace_cache::get_buffer_storage().store( + rocprofsys::trace_cache::entry_type::in_time_sample, + rocprofsys::trait::name::value, timestamp_ns, + event_metadata->to_string().c_str(), stack_id, parent_stack_id, correlation_id, + call_stack, line_info); } } // namespace @@ -308,11 +304,8 @@ extern "C" rocprofsys_init_hidden(_mode.c_str(), false, _arg0.c_str()); rocprofsys_push_trace_hidden("kokkos_main"); - if(rocprofsys::get_use_rocpd()) - { - rocpd_initialize_kokkos_category(); - rocpd_initialize_kokkos_track(); - } + metadata_initialize_kokkos_category(); + metadata_initialize_kokkos_track(); } setup_kernel_logger(); @@ -619,12 +612,8 @@ extern "C" kokkosp::profiler_t{ _name }.mark(); } - if(rocprofsys::config::get_use_rocpd()) - { - rocpd_process_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(), - "[dual_view_sync]", - (is_device) ? "device" : "host", timestamp); - } + cache_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(), "[dual_view_sync]", + (is_device) ? "device" : "host", timestamp); } void kokkosp_dual_view_modify(const char* label, const void* const, bool is_device) @@ -648,12 +637,8 @@ extern "C" kokkosp::profiler_t{ _name }.mark(); } - if(rocprofsys::config::get_use_rocpd()) - { - rocpd_process_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(), - "[dual_view_modify]", - (is_device) ? "device" : "host", timestamp); - } + cache_kokkos_event(JOIN(" ", _kp_prefix, label).c_str(), "[dual_view_modify]", + (is_device) ? "device" : "host", timestamp); } //----------------------------------------------------------------------------------// diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp index 647d43ab5d..097301ee9c 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp @@ -20,24 +20,31 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -#include "library/rocprofiler-sdk.hpp" +#include "core/rocprofiler-sdk.hpp" #include "api.hpp" #include "common/synchronized.hpp" +#include "core/common.hpp" #include "core/config.hpp" #include "core/containers/stable_vector.hpp" #include "core/debug.hpp" #include "core/gpu.hpp" #include "core/perfetto.hpp" -#include "core/rocprofiler-sdk.hpp" +#include "core/rocpd/json.hpp" #include "core/state.hpp" +#include "core/trace_cache/buffer_storage.hpp" +#include "core/trace_cache/cache_manager.hpp" +#include "core/trace_cache/metadata_registry.hpp" +#include "core/trace_cache/sample_type.hpp" #include "library/amd_smi.hpp" #include "library/components/category_region.hpp" +#include "library/rocprofiler-sdk.hpp" #include "library/rocprofiler-sdk/counters.hpp" #include "library/rocprofiler-sdk/fwd.hpp" #include "library/rocprofiler-sdk/rccl.hpp" #include "library/thread_info.hpp" #include "library/tracing.hpp" +#include #include #include #include @@ -247,7 +254,7 @@ create_agent_profile(rocprofiler_agent_id_t agent_id, ROCPROFSYS_ABORT_F( "Unable to find all counters for agent %i (gpu-%li, %s) in %s. Found: %s\n", tool_agent_v->agent->node_id, tool_agent_v->device_id, - tool_agent_v->agent->name, requested_counters.c_str(), + tool_agent_v->agent->name.c_str(), requested_counters.c_str(), found_counters.c_str()); } @@ -270,6 +277,12 @@ get_kernel_symbol_info(uint64_t _kernel_id) return tool_data->get_kernel_symbol_info(_kernel_id); } +const rocprofiler_callback_tracing_code_object_load_data_t* +get_code_object_info(uint64_t _code_object_id) +{ + return tool_data->get_code_object_info(_code_object_id); +} + // Implementation of rocprofiler_callback_tracing_operation_args_cb_t int save_args(rocprofiler_callback_tracing_kind_t /*kind*/, int32_t /*operation*/, @@ -283,6 +296,22 @@ save_args(rocprofiler_callback_tracing_kind_t /*kind*/, int32_t /*operation*/, return 0; } +// Additional implementation of rocprofiler_callback_tracing_operation_args_cb_t +// for iterating through arguments in a callback for rocpd_arg table in database +int +iterate_args_callback(rocprofiler_callback_tracing_kind_t /*kind*/, int32_t /*operation*/, + uint32_t arg_number, const void* const /*arg_value_addr*/, + int32_t /*arg_indirection_count*/, const char* arg_type, + const char* arg_name, const char* arg_value_str, + int32_t /*arg_dereference_count*/, void* data) +{ + auto* _data = static_cast(data); + if(arg_type && arg_name && arg_value_str) + _data->emplace_back( + argument_info{ arg_number, demangle(arg_type), arg_name, arg_value_str }); + return 0; +} + auto& get_marker_pushed_ranges() { @@ -299,6 +328,294 @@ get_marker_started_ranges() return _v; } +template +Tp* +as_pointer(Args&&... _args) +{ + return new Tp{ std::forward(_args)... }; +} + +template +void +consume_args(Tp&&...) +{} + +auto +get_backtrace(std::optional>& _bt_data) +{ + auto backtrace = ::rocpd::json::create(); + if(_bt_data && !_bt_data->empty()) + { + const std::string _unk = "??"; + size_t _bt_cnt = 0; + for(const auto& itr : *_bt_data) + { + auto _linfo = itr.lineinfo.get(); + const auto* _func = (itr.name.empty()) ? &_unk : &itr.name; + const auto* _loc = (_linfo && !_linfo.location.empty()) + ? &_linfo.location + : ((itr.location.empty()) ? &_unk : &itr.location); + auto _line = + (_linfo && _linfo.line > 0) + ? join("", _linfo.line) + : ((itr.lineno == 0) ? std::string{ "?" } : join("", itr.lineno)); + auto _entry = join("", demangle(*_func), " @ ", + join(':', ::basename(_loc->c_str()), _line)); + backtrace->set(join("", "frame#", _bt_cnt++), _entry); + } + } + return backtrace; +} + +template +uint64_t +get_parent_stack_id([[maybe_unused]] const CorrelationIdType& correlation_id) +{ +#if(ROCPROFILER_VERSION >= 700) + if constexpr(std::is_same_v) + { + return correlation_id.ancestor; + } + else + { + return 0; + } +#else + return 0; +#endif +} + +auto +get_extdata(const rocprofiler_callback_tracing_record_t& record) +{ + constexpr auto message_key = "message"; + auto args = callback_arg_array_t{}; + auto extdata = ::rocpd::json::create(); + + rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, 2, &args); + + for(auto [key, val] : args) + { + if(key == message_key) + { + extdata->set(key, val); + } + } + + return extdata; +} + +struct scope_destructor +{ + /// \fn scope_destructor(FuncT&& _fini, InitT&& _init) + /// \tparam FuncT "std::function or void (*)()" + /// \tparam InitT "std::function or void (*)()" + /// \param _fini Function to execute when object is destroyed + /// \param _init Function to execute when object is created (optional) + /// + /// \brief Provides a utility to perform an operation when exiting a scope. + template + scope_destructor(FuncT&& _fini, InitT&& _init = []() {}); + + ~scope_destructor() { m_functor(); } + + // delete copy operations + scope_destructor(const scope_destructor&) = delete; + scope_destructor& operator=(const scope_destructor&) = delete; + + // allow move operations + scope_destructor(scope_destructor&& rhs) noexcept; + scope_destructor& operator=(scope_destructor&& rhs) noexcept; + +private: + std::function m_functor = []() {}; +}; + +template +scope_destructor::scope_destructor(FuncT&& _fini, InitT&& _init) +: m_functor{ std::forward(_fini) } +{ + _init(); +} + +inline scope_destructor::scope_destructor(scope_destructor&& rhs) noexcept +: m_functor{ std::move(rhs.m_functor) } +{ + rhs.m_functor = []() {}; +} + +inline scope_destructor& +scope_destructor::operator=(scope_destructor&& rhs) noexcept +{ + if(this != &rhs) + { + m_functor = std::move(rhs.m_functor); + rhs.m_functor = []() {}; + } + return *this; +} + +using kernel_rename_stack_t = std::stack; + +thread_local auto thread_dispatch_rename = as_pointer(); +thread_local auto thread_dispatch_rename_dtor = scope_destructor{ []() { + delete thread_dispatch_rename; + thread_dispatch_rename = nullptr; +} }; + +template +void +cache_category() +{ + trace_cache::get_metadata_registry().add_string(trait::name::value); +} + +void +cache_add_thread_info(uint64_t tid) +{ + trace_cache::get_metadata_registry().add_thread_info( + { getppid(), getpid(), tid, 0, 0, "{}" }); +} + +void +cache_add_track(const char* track_name, uint64_t tid) +{ + trace_cache::get_metadata_registry().add_track({ track_name, tid, "{}" }); +} + +size_t +get_mem_copy_dst_address( + [[maybe_unused]] const rocprofiler_buffer_tracing_memory_copy_record_t& record) +{ +#if(ROCPROFILER_VERSION >= 700) + return record.dst_address.value; +#else + return 0; +#endif +} + +size_t +get_mem_copy_src_address( + [[maybe_unused]] const rocprofiler_buffer_tracing_memory_copy_record_t& record) +{ +#if(ROCPROFILER_VERSION >= 700) + return record.src_address.value; +#else + return 0; +#endif +} + +#if(ROCPROFILER_VERSION >= 600) +size_t +get_mem_alloc_address( + [[maybe_unused]] const rocprofiler_buffer_tracing_memory_allocation_record_t& record) +{ +# if(ROCPROFILER_VERSION >= 700) + return record.address.value; +# else + return static_cast(record.address.handle); +# endif +} +#endif + +// clang-format off +void +cache_region(const rocprofiler_callback_tracing_record_t* record, + const rocprofiler_timestamp_t start_timestamp, + const rocprofiler_timestamp_t end_timestamp, const std::string& call_stack, + const std::string& args_str, const std::string& category) + +{ + trace_cache::get_buffer_storage().store( + trace_cache::entry_type::region, + record->thread_id, + static_cast(record->kind), + static_cast(record->operation), + record->correlation_id.internal, + get_parent_stack_id(record->correlation_id), + start_timestamp, + end_timestamp, + call_stack.c_str(), + args_str.c_str(), + category.c_str()); +} + +void +cache_kernel_dispatch(rocprofiler_buffer_tracing_kernel_dispatch_record_t* record) +{ + auto stream_handle = get_stream_id(record).handle; + auto queue_handle = record->dispatch_info.queue_id.handle; + + trace_cache::get_metadata_registry().add_queue(queue_handle); + trace_cache::get_metadata_registry().add_stream(stream_handle); + + trace_cache::get_buffer_storage().store( + trace_cache::entry_type::kernel_dispatch, + record->start_timestamp, + record->end_timestamp, + record->thread_id, + record->dispatch_info.agent_id.handle, + record->dispatch_info.kernel_id, + record->dispatch_info.dispatch_id, + record->dispatch_info.queue_id.handle, + record->correlation_id.internal, + get_parent_stack_id(record->correlation_id), + record->dispatch_info.private_segment_size, + record->dispatch_info.group_segment_size, + record->dispatch_info.workgroup_size.x, + record->dispatch_info.workgroup_size.y, + record->dispatch_info.workgroup_size.z, + record->dispatch_info.grid_size.x, + record->dispatch_info.grid_size.y, + record->dispatch_info.grid_size.z, + stream_handle); +} + +void +cache_memory_copy(rocprofiler_buffer_tracing_memory_copy_record_t* record) +{ + auto stream_handle = get_stream_id(record).handle; + + trace_cache::get_buffer_storage().store( + trace_cache::entry_type::memory_copy, + record->start_timestamp, + record->end_timestamp, + record->thread_id, + record->dst_agent_id.handle, + record->src_agent_id.handle, + static_cast(record->kind), + static_cast(record->operation), + record->bytes, + record->correlation_id.internal, + get_parent_stack_id(record->correlation_id), + get_mem_copy_dst_address(*record), + get_mem_copy_src_address(*record), + stream_handle); +} + +#if (ROCPROFILER_VERSION >= 600) +void +cache_memory_allocation(rocprofiler_buffer_tracing_memory_allocation_record_t* record) +{ + auto stream_handle = get_stream_id(record).handle; + + trace_cache::get_metadata_registry().add_stream(stream_handle); + trace_cache::get_buffer_storage().store( + trace_cache::entry_type::memory_alloc, + record->start_timestamp, + record->end_timestamp, + record->thread_id, + record->agent_id.handle, + static_cast(record->kind), + static_cast(record->operation), + record->allocation_size, + record->correlation_id.internal, + get_parent_stack_id(record->correlation_id), + get_mem_alloc_address(*record), + stream_handle); +} +#endif +// clang-format on template void tool_tracing_callback_start(CategoryT, rocprofiler_callback_tracing_record_t record, @@ -391,7 +708,8 @@ tool_tracing_callback_stop( { ROCPROFSYS_CONDITIONAL_ABORT_F( get_marker_started_ranges().empty(), - "roctxRangeStop does not have corresponding roctxRangeStart on " + "roctxRangeStop does not have corresponding roctxRangeStart " + "on " "this thread"); auto _hash = get_marker_started_ranges().back().first; @@ -473,8 +791,9 @@ tool_tracing_callback_stop( join(':', ::basename(_loc->c_str()), _line)); if(_bt_cnt < 10) { - // Prepend zero for better ordering in UI. Only one zero - // is ever necessary since stack depth is limited to 16. + // Prepend zero for better ordering in UI. Only one + // zero is ever necessary since stack depth is limited + // to 16. tracing::add_perfetto_annotation( ctx, join("", "frame#0", _bt_cnt++), _entry); } @@ -493,6 +812,33 @@ tool_tracing_callback_stop( tracing::add_perfetto_annotation(ctx, "end_ns", _end_ts); }); } + + // Insert callback trace into database + auto args = function_args_t{}; + + rocprofiler_iterate_callback_tracing_kind_operation_args( + record, iterate_args_callback, 2, &args); + + auto call_stack = get_backtrace(_bt_data); + uint64_t _beg_ts = user_data->value; + uint64_t _end_ts = ts; + + { + cache_category(); + cache_add_thread_info(record.thread_id); + std::string args_str; + + std::for_each(args.begin(), args.end(), [&args_str](const argument_info& arg) { + const auto* delimiter = ";;"; + std::stringstream ss; + ss << arg.arg_number << delimiter << arg.arg_type << delimiter << arg.arg_name + << delimiter << arg.arg_value << delimiter; + args_str.append(ss.str()); + }); + + cache_region(&record, _beg_ts, _end_ts, call_stack->to_string(), args_str, + trait::name::value); + } } void @@ -535,6 +881,7 @@ tool_code_object_callback(rocprofiler_callback_tracing_record_t record, _data.emplace_back( code_object_callback_record_t{ ts, record, data_v }); }); + trace_cache::get_metadata_registry().add_code_object(data_v); } else if(record.operation == ROCPROFILER_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER) @@ -545,6 +892,7 @@ tool_code_object_callback(rocprofiler_callback_tracing_record_t record, _data.emplace_back( new kernel_symbol_callback_record_t{ ts, record, data_v }); }); + trace_cache::get_metadata_registry().add_kernel_symbol(data_v); } } return; @@ -635,7 +983,6 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, case ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY: #if(ROCPROFILER_VERSION >= 600) case ROCPROFILER_CALLBACK_TRACING_OMPT: - case ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION: case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION: #endif #if(ROCPROFILER_VERSION >= 700) @@ -646,6 +993,12 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, record.kind); break; } + default: + { + ROCPROFSYS_CI_ABORT(true, "Unhandled callback record: \n\t%s\n", + info.str().c_str()); + break; + } } } else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) @@ -657,8 +1010,11 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, constexpr bool bt_with_signal_frame = true; auto _bt_data = std::optional{}; + auto use_perfetto = + (config::get_use_perfetto() && config::get_perfetto_annotations()); + auto use_rocpd = config::get_use_rocpd(); - if(config::get_use_perfetto() && config::get_perfetto_annotations() && + if((use_perfetto || use_rocpd) && tool_data->backtrace_operations.at(record.kind).count(record.operation) > 0) { auto _backtrace = tim::get_unw_stack= 600) case ROCPROFILER_CALLBACK_TRACING_OMPT: - case ROCPROFILER_CALLBACK_TRACING_MEMORY_ALLOCATION: case ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION: #endif #if(ROCPROFILER_VERSION >= 700) @@ -746,27 +1101,41 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, record.kind); break; } + default: + { + ROCPROFSYS_CI_ABORT(true, "Unhandled callback record\n\t%s\n", + info.str().c_str()); + break; + } } } else if(record.phase == ROCPROFILER_CALLBACK_PHASE_NONE) { - if(record.kind == ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH && - record.operation == ROCPROFILER_KERNEL_DISPATCH_COMPLETE) + switch(record.kind) { - auto* _data = - static_cast( - record.payload); + case ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH: + { + if(record.operation == ROCPROFILER_KERNEL_DISPATCH_COMPLETE) + { + auto* _data = + static_cast( + record.payload); - // save for post-processing - get_kernel_dispatch_timestamps().emplace( - _data->dispatch_info.dispatch_id, - timing_interval{ _data->start_timestamp, _data->end_timestamp }); - } - else - { - ROCPROFSYS_WARNING_F( - 1, "tool_tracing_callback: unhandled PHASE_NONE callback record\n\t%s\n", - info.str().c_str()); + // save for post-processing + get_kernel_dispatch_timestamps().emplace( + _data->dispatch_info.dispatch_id, + timing_interval{ _data->start_timestamp, _data->end_timestamp }); + } + } + break; + default: + { + ROCPROFSYS_WARNING_F(1, + "tool_tracing_callback: unhandled PHASE_NONE " + "callback record\n\t%s\n", + info.str().c_str()); + } + break; } } else @@ -820,10 +1189,20 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/, uint64_t _stream_id = get_stream_id(record).handle; if(_stream_id == 0) { - // kernel is not associated with a HIP stream + // kernel_dispatch is not associated with a HIP stream _group_by_queue = true; } + { + cache_category(); + cache_add_thread_info(record->thread_id); + cache_add_track(JOIN("", "GPU Kernel Dispatch [", _agent->device_id, + "] Queue ", _queue_id.handle) + .c_str(), + record->thread_id); + cache_kernel_dispatch(record); + } + if(get_use_timemory()) { const auto& _tinfo = thread_info::get(record->thread_id, SystemTID); @@ -937,6 +1316,20 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/, _group_by_queue = true; } + { + size_t thread_idx = record->thread_id; + std::string track_name; + + track_name = + JOIN("", "GPU Memory Copy to Agent [", + _dst_agent->logical_node_id, "] Thread ", thread_idx); + + cache_category(); + cache_add_track(track_name.c_str(), record->thread_id); + + cache_memory_copy(record); + } + if(get_use_timemory()) { const auto& _tinfo = thread_info::get(record->thread_id, SystemTID); @@ -1005,6 +1398,26 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/, } } } +#if(ROCPROFILER_VERSION >= 600) + else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION) + { + auto* record = + static_cast( + header->payload); + + { + cache_category(); + cache_add_thread_info(record->thread_id); + cache_memory_allocation(record); + } + } +#endif + else if(header->kind == ROCPROFILER_BUFFER_TRACING_HSA_CORE_API || + header->kind == ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API) + { + // Not handling those buffered events + continue; + } else { ROCPROFSYS_THROW( @@ -1098,10 +1511,10 @@ counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_da ROCPROFSYS_CONDITIONAL_ABORT_F( !_agent, "unable to find tool agent for agent (id=%zu)\n", _agent_id.handle); - ROCPROFSYS_CONDITIONAL_ABORT_F( - !_info, - "unable to find counter info for counter (id=%zu) on agent (id=%zu)\n", - itr.first.handle, _agent_id.handle); + ROCPROFSYS_CONDITIONAL_ABORT_F(!_info, + "unable to find counter info for counter " + "(id=%zu) on agent (id=%zu)\n", + itr.first.handle, _agent_id.handle); auto _dev_id = static_cast(_agent->device_id); @@ -1159,7 +1572,6 @@ void flush() { if(!tool_data) return; - for(auto itr : tool_data->get_buffers()) { if(itr.handle > 0) @@ -1268,6 +1680,24 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data) _data->primary_ctx, ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, nullptr, 0, tool_code_object_callback, _data)); + auto external_corr_id_request_kinds = + std::array{ + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH, + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY, +#if(ROCPROFILER_VERSION >= 600) + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_ALLOCATION +#endif + }; + + // Insert the default stream and queue info to ensure that the default entry is + { + trace_cache::get_metadata_registry().add_stream(0); + trace_cache::get_metadata_registry().add_queue(0); + } + // ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service( + // _data->primary_ctx, ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT, nullptr, 0, + // tool_code_object_callback, _data)); + for(auto itr : { ROCPROFILER_CALLBACK_TRACING_HSA_CORE_API, ROCPROFILER_CALLBACK_TRACING_HSA_AMD_EXT_API, @@ -1296,17 +1726,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data) } } - constexpr auto buffer_size = 8192; - constexpr auto watermark = 7936; - - // Configure external correlation id request service for kernel dispatch - // and memory copy. - - auto external_corr_id_request_kinds = - std::array{ - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH, - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY - }; + constexpr auto buffer_size = 16 * 4096; + constexpr auto watermark = 15 * 4096; ROCPROFILER_CALL(rocprofiler_configure_external_correlation_id_request_service( _data->primary_ctx, external_corr_id_request_kinds.data(), @@ -1334,7 +1755,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data) _data->primary_ctx, ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, nullptr, 0, _data->kernel_dispatch_buffer)); } - + // ROCPROFILER_BUFFER_TRACING_HSA_CORE_API, ///< @see + // ::rocprofiler_hsa_core_api_id_t ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API, if(_buffered_domain.count(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY) > 0) { ROCPROFILER_CALL(rocprofiler_create_buffer( @@ -1347,19 +1769,39 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data) _data->memory_copy_buffer)); } +#if(ROCPROFILER_VERSION >= 600) + if(_buffered_domain.count(ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION) > 0) + { + ROCPROFILER_CALL(rocprofiler_create_buffer( + _data->primary_ctx, buffer_size, watermark, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, tool_tracing_buffered, tool_data, + &_data->memory_alloc_buffer)); + if(_data->memory_alloc_buffer.handle == 0UL) + { + ROCPROFSYS_CI_ABORT(true, "Failed to create memory allocation buffer\n"); + } + auto _ops = + rocprofiler_sdk::get_operations(ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION); + + ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service( + _data->primary_ctx, ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION, nullptr, 0, + _data->memory_alloc_buffer)); + } +#endif + if(!_counter_events.empty()) { for(const auto& itr : _data->gpu_agents) { + const auto& _agent_id = rocprofiler_agent_id_t{ itr.agent->handle }; _data->agent_events.emplace( - itr.agent->id, - create_agent_profile(itr.agent->id, _counter_events, _data)); + _agent_id, create_agent_profile(_agent_id, _counter_events, _data)); } ROCPROFILER_CALL(rocprofiler_create_context(&_data->counter_ctx)); auto _operations = std::array{ - ROCPROFILER_KERNEL_DISPATCH_COMPLETE + ROCPROFILER_KERNEL_DISPATCH_COMPLETE, }; ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service( @@ -1393,8 +1835,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data) } } - // throwaway context for handling the profiler control API. If primary_ctx were used, - // we would get profiler pause callback but never get profiler resume callback + // throwaway context for handling the profiler control API. If primary_ctx were + // used, we would get profiler pause callback but never get profiler resume + // callback { auto _local_ctx = rocprofiler_context_id_t{ 0 }; ROCPROFILER_CALL(rocprofiler_create_context(&_local_ctx)); @@ -1405,8 +1848,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* user_data) if(!is_valid(_data->primary_ctx)) { - // notify rocprofiler that initialization failed and all the contexts, buffers, - // etc. created should be ignored + // notify rocprofiler that initialization failed and all the contexts, + // buffers, etc. created should be ignored return -1; } @@ -1454,13 +1897,7 @@ tool_fini(void* callback_data) void setup() -{ - if(int status = 0; - rocprofiler_is_initialized(&status) == ROCPROFILER_STATUS_SUCCESS && status == 0) - { - ROCPROFILER_CALL(rocprofiler_force_configure(&rocprofiler_configure)); - } -} +{} void shutdown() diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.cpp index 13a2c251a6..95368705a4 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.cpp @@ -91,15 +91,17 @@ get_agent_counter_info(const tool_agent_vec_t& _agents) for(auto itr : _agents) { - ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters( - itr.agent->id, counters_supported_callback, &_data)); + const auto& _agent_id = rocprofiler_agent_id_t{ itr.agent->handle }; - std::sort(_data.at(itr.agent->id).begin(), _data.at(itr.agent->id).end(), + ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters( + _agent_id, counters_supported_callback, &_data)); + + std::sort(_data.at(_agent_id).begin(), _data.at(_agent_id).end(), [](const auto& lhs, const auto& rhs) { return (lhs.id.handle < rhs.id.handle); }); - for(auto& citr : _data.at(itr.agent->id)) + for(auto& citr : _data.at(_agent_id)) { std::sort(citr.dimension_info.begin(), citr.dimension_info.end(), [](const auto& lhs, const auto& rhs) { return (lhs.id < rhs.id); }); @@ -123,38 +125,16 @@ client_data::initialize() buffered_tracing_info = rocprofiler::sdk::get_buffer_tracing_names(); callback_tracing_info = rocprofiler::sdk::get_callback_tracing_names(); - static constexpr auto supported_agent_info_version = ROCPROFILER_AGENT_INFO_VERSION_0; - - rocprofiler_query_available_agents_cb_t iterate_cb = - [](rocprofiler_agent_version_t version, const void** agents_arr, - size_t num_agents, void* user_data) { - ROCPROFSYS_CONDITIONAL_ABORT(version != supported_agent_info_version, - "rocprofiler agent info version != expected " - "agent info version (=%i). value: %i\n", - supported_agent_info_version, version); - - auto _agents_v = std::vector{}; - for(size_t i = 0; i < num_agents; ++i) - { - const auto* _agent = - static_cast(agents_arr[i]); - _agents_v.emplace_back(*_agent); - } - - auto* tool_data_v = as_client_data(user_data); - tool_data_v->set_agents(std::move(_agents_v)); - - return ROCPROFILER_STATUS_SUCCESS; - }; - - ROCPROFILER_CALL(rocprofiler_query_available_agents( - supported_agent_info_version, iterate_cb, sizeof(rocprofiler_agent_t), this)); + set_agents(); } void client_data::initialize_event_info() { - if(agents.empty()) initialize(); + if(agent_manager::get_instance().get_agents().empty()) + { + initialize(); + } if(agent_counter_info.size() != gpu_agents.size()) agent_counter_info = get_agent_counter_info(gpu_agents); @@ -166,14 +146,15 @@ client_data::initialize_event_info() for(const auto& aitr : gpu_agents) { - auto _dev_index = aitr.device_id; - auto _device_qualifier_sym = JOIN("", ":device=", _dev_index); - auto _device_qualifier = + auto _dev_index = aitr.device_id; + const auto& _agent_id = rocprofiler_agent_id_t{ aitr.agent->handle }; + auto _device_qualifier_sym = JOIN("", ":device=", _dev_index); + auto _device_qualifier = tim::hardware_counters::qualifier{ true, static_cast(_dev_index), _device_qualifier_sym, JOIN(" ", "Device", _dev_index) }; - auto _counter_info = agent_counter_info.at(aitr.agent->id); + auto _counter_info = agent_counter_info.at(_agent_id); std::sort(_counter_info.begin(), _counter_info.end(), [](const rocprofiler_tool_counter_info_t& lhs, const rocprofiler_tool_counter_info_t& rhs) { @@ -248,23 +229,20 @@ client_data::initialize_event_info() } void -client_data::set_agents(agent_vec_t&& _agents_v) +client_data::set_agents() { - agents = std::move(_agents_v); + auto& agent_mngr = agent_manager::get_instance(); - std::sort(agents.begin(), agents.end(), - [](const auto& lhs, const auto& rhs) { return lhs.node_id < rhs.node_id; }); + auto fill_agents = [&](agent_type type, std::vector& out) { + const auto& _agents = agent_mngr.get_agents_by_type(type); + for(const auto& agent : _agents) + { + out.emplace_back(tool_agent{ agent->device_type_index, agent.get() }); + } + }; - cpu_agents.clear(); - gpu_agents.clear(); - - for(const auto& itr : agents) - { - if(itr.type == ROCPROFILER_AGENT_TYPE_CPU) - cpu_agents.emplace_back(tool_agent{ cpu_agents.size(), &itr }); - else if(itr.type == ROCPROFILER_AGENT_TYPE_GPU) - gpu_agents.emplace_back(tool_agent{ gpu_agents.size(), &itr }); - } + fill_agents(agent_type::GPU, gpu_agents); + fill_agents(agent_type::CPU, cpu_agents); } } // namespace rocprofiler_sdk } // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.hpp index 28a26b9284..3736f7df07 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.hpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/fwd.hpp @@ -23,6 +23,7 @@ #pragma once #include "common/synchronized.hpp" +#include "core/agent_manager.hpp" #include "core/timemory.hpp" #include @@ -50,6 +51,8 @@ using kernel_symbol_map_t = std::unordered_map; using callback_arg_array_t = std::vector>; +using rocprofsys_agent_t = agent; + struct code_object_callback_record_t { uint64_t timestamp = 0; @@ -88,8 +91,8 @@ struct rocprofiler_tool_counter_info_t : rocprofiler_counter_info_v0_t struct tool_agent { - uint64_t device_id = 0; - const rocprofiler_agent_v0_t* agent = nullptr; + uint64_t device_id = 0; + const rocprofsys_agent_t* agent = nullptr; }; struct timing_interval @@ -98,6 +101,16 @@ struct timing_interval rocprofiler_timestamp_t end = 0; }; +struct argument_info +{ + uint32_t arg_number = 0; + std::string arg_type = {}; + std::string arg_name = {}; + std::string arg_value = {}; +}; + +using function_args_t = std::vector; + using agent_counter_info_map_t = std::unordered_map>; @@ -117,7 +130,7 @@ using backtrace_operation_map_t = struct client_data { - static constexpr size_t num_buffers = 3; + static constexpr size_t num_buffers = 4; static constexpr size_t num_contexts = 2; using buffer_name_info_t = rocprofiler::sdk::buffer_name_info_t; @@ -134,8 +147,8 @@ struct client_data rocprofiler_context_id_t counter_ctx = { 0 }; rocprofiler_buffer_id_t kernel_dispatch_buffer = { 0 }; rocprofiler_buffer_id_t memory_copy_buffer = { 0 }; + rocprofiler_buffer_id_t memory_alloc_buffer = { 0 }; rocprofiler_buffer_id_t counter_collection_buffer = { 0 }; - std::vector agents = {}; std::vector cpu_agents = {}; std::vector gpu_agents = {}; std::vector events_info = {}; @@ -150,14 +163,16 @@ struct client_data void initialize(); void initialize_event_info(); - void set_agents(agent_vec_t&& agents); + void set_agents(); context_id_vec_t get_contexts() const; buffer_id_vec_t get_buffers() const; - const rocprofiler_agent_t* get_agent(rocprofiler_agent_id_t _id) const; + const rocprofsys_agent_t* get_agent(rocprofiler_agent_id_t _id) const; const tool_agent* get_gpu_tool_agent(rocprofiler_agent_id_t id) const; const kernel_symbol_data_t* get_kernel_symbol_info(uint64_t _kernel_id) const; const rocprofiler_tool_counter_info_t* get_tool_counter_info( rocprofiler_agent_id_t _agent_id, rocprofiler_counter_id_t _counter_id) const; + const rocprofiler_callback_tracing_code_object_load_data_t* get_code_object_info( + uint64_t code_object_id) const; }; inline client_data::context_id_vec_t @@ -175,23 +190,24 @@ client_data::get_buffers() const return buffer_id_vec_t{ kernel_dispatch_buffer, memory_copy_buffer, + memory_alloc_buffer, counter_collection_buffer, }; } -inline const rocprofiler_agent_t* +inline const rocprofsys_agent_t* client_data::get_agent(rocprofiler_agent_id_t _id) const { - for(const auto& itr : agents) - if(itr.id == _id) return &itr; - return nullptr; + const auto& agent = agent_manager::get_instance().get_agent_by_handle(_id.handle); + + return &agent; } inline const tool_agent* client_data::get_gpu_tool_agent(rocprofiler_agent_id_t id) const { for(const auto& itr : gpu_agents) - if(id == itr.agent->id) return &itr; + if(id.handle == itr.agent->handle) return &itr; return nullptr; } @@ -223,6 +239,24 @@ client_data::get_tool_counter_info(rocprofiler_agent_id_t _agent_id, return nullptr; } +inline const rocprofiler_callback_tracing_code_object_load_data_t* +client_data::get_code_object_info(uint64_t code_object_id) const +{ + return code_object_records.rlock( + [code_object_id](const auto& _data) + -> const rocprofiler_callback_tracing_code_object_load_data_t* { + for(const auto& itr : _data) + { + if(code_object_id == itr.payload.code_object_id) + { + return &itr.payload; + break; + } + } + return nullptr; + }); +} + inline constexpr client_data* as_client_data(void* _ptr) { diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp index 7b39501617..76088023af 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp @@ -31,13 +31,13 @@ #include "core/rocpd/data_processor.hpp" #include "core/rocpd/json.hpp" #include "core/state.hpp" +#include "core/trace_cache/cache_manager.hpp" #include "core/utility.hpp" #include "library/components/backtrace.hpp" #include "library/components/backtrace_metrics.hpp" #include "library/components/backtrace_timestamp.hpp" #include "library/components/callchain.hpp" #include "library/perf.hpp" -#include "library/ptl.hpp" #include "library/runtime.hpp" #include "library/thread_data.hpp" #include "library/thread_info.hpp" @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -83,6 +84,7 @@ #include #include #include +#include #include #include @@ -227,46 +229,65 @@ get_data_processor() return rocpd::data_processor::get_instance(); } +template +std::string +get_track_name(const thread_info& _thread_info) +{ + size_t thread_id = _thread_info.index_data->system_value; + size_t sequent_value = _thread_info.index_data->sequent_value; + constexpr auto sample_type = + std::is_same_v ? "Timer" : "Overflow"; + return JOIN(" ", "Thread", sequent_value, sample_type, "(S)", thread_id); +} + void rocpd_initialize_sampling_category() { static bool _is_initialized = false; if(_is_initialized) return; - auto& data_processor = get_data_processor(); - data_processor.insert_category(ROCPROFSYS_CATEGORY_SAMPLING, - trait::name::value); - data_processor.insert_category(ROCPROFSYS_CATEGORY_OVERFLOW_SAMPLING, - trait::name::value); - data_processor.insert_category(ROCPROFSYS_CATEGORY_TIMER_SAMPLING, - trait::name::value); + trace_cache::get_metadata_registry().add_string( + trait::name::value); + trace_cache::get_metadata_registry().add_string( + trait::name::value); + trace_cache::get_metadata_registry().add_string( + trait::name::value); _is_initialized = true; } -size_t +void rocpd_initialize_thread_info(size_t tid) { const auto& _thread_info = thread_info::get(tid, SequentTID); ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", tid); - if(!_thread_info) return -1; + if(!_thread_info) return; - auto& data_processor = get_data_processor(); - auto& n_info = node_info::get_instance(); - - return data_processor.insert_thread_info( - n_info.id, getppid(), getpid(), _thread_info->index_data->system_value, - threading::get_thread_name().c_str(), _thread_info->get_start(), - _thread_info->get_stop(), "{}"); + trace_cache::get_metadata_registry().add_thread_info( + { getppid(), getpid(), + static_cast(_thread_info->index_data->system_value), + static_cast(_thread_info->get_start()), + static_cast(_thread_info->get_stop()), "{}" }); } void -rocpd_init_track(const char* track_name, int64_t tid) +rocpd_init_track(int64_t tid) { - auto& data_processor = get_data_processor(); - auto& n_info = node_info::get_instance(); + const auto& _thread_info = thread_info::get(tid, SequentTID); + ROCPROFSYS_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", tid); + if(!_thread_info) return; - data_processor.insert_track(track_name, n_info.id, getpid(), tid, "{}"); + size_t thread_id = _thread_info->index_data->system_value; + + const auto& _timer_track_name = + get_track_name(*_thread_info); + const auto& _overflow_track_name = + get_track_name(*_thread_info); + + trace_cache::get_metadata_registry().add_track( + { _timer_track_name, thread_id, "{}" }); + trace_cache::get_metadata_registry().add_track( + { _overflow_track_name, thread_id, "{}" }); } template @@ -275,11 +296,12 @@ rocpd_insert_region(size_t thread_id, size_t start_time, size_t end_time, size_t const char* track, const char* call_stack = "{}", const char* line_info = "{}", const char* extdata = "{}") { - auto& data_processor = get_data_processor(); - auto& n_info = node_info::get_instance(); + auto& data_processor = get_data_processor(); + auto& n_info = node_info::get_instance(); + auto string_primary_key = data_processor.insert_string(trait::name::value); - auto event_id = data_processor.insert_event(category_enum_id::value, 0, 0, - 0, call_stack, line_info, extdata); + auto event_id = data_processor.insert_event(string_primary_key, 0, 0, 0, call_stack, + line_info, extdata); data_processor.insert_region(n_info.id, getpid(), thread_id, start_time, end_time, name_id, event_id); @@ -849,6 +871,9 @@ configure(bool _setup, int64_t _tid) } } } + rocpd_initialize_sampling_category(); + rocpd_initialize_thread_info(_tid); + rocpd_init_track(_tid); *_running = true; sampling::get_sampler_init(_tid)->sample(); @@ -1403,7 +1428,7 @@ post_process_perfetto(int64_t _tid, const std::vector& _tim { auto _ncur = _ncount++; // the begin/end + HW counters will be same for entire call-stack so only - // annotate the top and the bottom functons to keep the data consumption + // annotate the top and the bottom functions to keep the data consumption // low bool _include_common = (_ncur == 0 || _ncur + 1 == itr.m_stack.size()); @@ -1707,14 +1732,14 @@ rocpd_post_process_overflow_data( .first->c_str(); auto main_name_id = data_processor.insert_string(_main_name); - const auto& _track_name = - JOIN(" ", "Thread", _thread_info->index_data->sequent_value, "Overflow", - "(S)", _thread_info->index_data->system_value); + size_t thread_id = _thread_info->index_data->system_value; + + auto thread_primary_key = data_processor.map_thread_id_to_primary_key(thread_id); + const auto _track_name = + get_track_name(*_thread_info); - auto thread_idx = rocpd_initialize_thread_info(_tid); - rocpd_init_track(_track_name.c_str(), thread_idx); rocpd_insert_region( - thread_idx, _beg_ns, _end_ns, main_name_id, _track_name.c_str()); + thread_primary_key, _beg_ns, _end_ns, main_name_id, _track_name.c_str()); for(const auto& itr : _overflow_data) { @@ -1729,7 +1754,7 @@ rocpd_post_process_overflow_data( static_strings.emplace(demangle(iitr.name)).first->c_str(); auto name_id = data_processor.insert_string(_name); rocpd_insert_region( - thread_idx, _beg, _end, name_id, _track_name.c_str(), + thread_primary_key, _beg, _end, name_id, _track_name.c_str(), generate_call_stack_json(iitr).c_str(), generate_line_info_json(iitr).c_str()); } @@ -1738,9 +1763,11 @@ rocpd_post_process_overflow_data( } void -rocpd_post_process_backtrace_metrics(int64_t _tid, - const std::vector& _timer_data) +rocpd_post_process_backtrace_metrics( + [[maybe_unused]] int64_t _tid, + [[maybe_unused]] const std::vector& _timer_data) { +#if ROCPROFSYS_USE_ROCM > 0 auto _valid_metrics = backtrace_metrics::valid_array_t{}; for(const auto& itr : _timer_data) @@ -1752,17 +1779,20 @@ rocpd_post_process_backtrace_metrics(int64_t _ti { ROCPROFSYS_VERBOSE(3 || get_debug_sampling(), "[%li] Post-processing metrics for rocpd...\n", _tid); - backtrace_metrics::init_rocpd(_tid, _valid_metrics); + backtrace_metrics::init_rocpd(_tid, _valid_metrics); // move to setup for(const auto& itr : _timer_data) itr.m_metrics.post_process_rocpd(_tid, 0.5 * (itr.m_beg + itr.m_end)); backtrace_metrics::fini_rocpd(_tid, _valid_metrics); } +#endif } void -rocpd_post_process_timer_data(int64_t _tid, - const std::vector& _timer_data) +rocpd_post_process_timer_data( + [[maybe_unused]] int64_t _tid, + [[maybe_unused]] const std::vector& _timer_data) { +#if ROCPROFSYS_USE_ROCM > 0 auto& data_processor = get_data_processor(); const auto& _thread_info = thread_info::get(_tid, SequentTID); @@ -1776,15 +1806,14 @@ rocpd_post_process_timer_data(int64_t _tid, auto _beg_ns = std::max(_timer_data.front().m_beg, _thread_info->get_start()); auto _end_ns = std::min(_timer_data.back().m_end, _thread_info->get_stop()); - const auto _track_name = - JOIN(" ", "Thread", _thread_info->index_data->sequent_value, "(S)", - _thread_info->index_data->system_value); + const auto _track_name = get_track_name(*_thread_info); + + auto thread_primary_key = data_processor.map_thread_id_to_primary_key( + _thread_info->index_data->system_value); - auto thread_idx = rocpd_initialize_thread_info(_tid); - rocpd_init_track(_track_name.c_str(), thread_idx); const auto main_name_id = data_processor.insert_string("samples [rocprof-sys]"); - rocpd_insert_region(thread_idx, _beg_ns, _end_ns, - main_name_id, _track_name.c_str()); + rocpd_insert_region( + thread_primary_key, _beg_ns, _end_ns, main_name_id, _track_name.c_str()); auto _labels = backtrace_metrics::get_hw_counter_labels(_tid); for(const auto& itr : _timer_data) @@ -1798,7 +1827,7 @@ rocpd_post_process_timer_data(int64_t _tid, { auto _ncur = _ncount++; // the begin/end + HW counters will be same for entire call-stack so only - // annotate the top and the bottom functons to keep the data consumption + // annotate the top and the bottom functions to keep the data consumption // low bool _include_common = (_ncur == 0 || _ncur + 1 == itr.m_stack.size()); @@ -1832,8 +1861,9 @@ rocpd_post_process_timer_data(int64_t _tid, inlined_call_stack->set("inlined", "true"); rocpd_insert_region( - thread_idx, _beg, _end, inlined_name_id, _track_name.c_str(), - inlined_call_stack->to_string().c_str(), "{}", + thread_primary_key, _beg, _end, inlined_name_id, + _track_name.c_str(), inlined_call_stack->to_string().c_str(), + "{}", // Only include HW counters for first inlined function (_n == 0) ? hw_counter_json.c_str() : "{}"); } @@ -1843,22 +1873,28 @@ rocpd_post_process_timer_data(int64_t _tid, const auto* _name = static_strings.emplace(iitr.name).first->c_str(); const auto name_id = data_processor.insert_string(_name); rocpd_insert_region( - thread_idx, _beg, _end, name_id, _track_name.c_str(), + thread_primary_key, _beg, _end, name_id, _track_name.c_str(), generate_call_stack_json(iitr).c_str(), generate_line_info_json(iitr).c_str(), hw_counter_json.c_str()); } } } } +#endif } void post_process_rocpd(int64_t _tid, const std::vector& _timer_data, const std::vector& _overflow_data) { - rocpd_initialize_sampling_category(); +#if ROCPROFSYS_USE_ROCM > 0 rocpd_post_process_overflow_data(_tid, _overflow_data); rocpd_post_process_timer_data(_tid, _timer_data); +#else + (void) _tid; + (void) _timer_data; + (void) _overflow_data; +#endif } struct sampling_initialization