diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/CMakeLists.txt b/projects/rocprofiler-systems/source/lib/core/trace_cache/CMakeLists.txt index 28ef1eccaf..bcab341a06 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/CMakeLists.txt +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/CMakeLists.txt @@ -22,23 +22,27 @@ set(trace_cache_sources ${CMAKE_CURRENT_LIST_DIR}/cache_manager.cpp - ${CMAKE_CURRENT_LIST_DIR}/storage_parser.cpp ${CMAKE_CURRENT_LIST_DIR}/buffer_storage.cpp ${CMAKE_CURRENT_LIST_DIR}/metadata_registry.cpp - ${CMAKE_CURRENT_LIST_DIR}/rocpd_post_processing.cpp + ${CMAKE_CURRENT_LIST_DIR}/rocpd_processor.cpp ) set(trace_cache_headers ${CMAKE_CURRENT_LIST_DIR}/cache_manager.hpp ${CMAKE_CURRENT_LIST_DIR}/storage_parser.hpp ${CMAKE_CURRENT_LIST_DIR}/buffer_storage.hpp - ${CMAKE_CURRENT_LIST_DIR}/cache_utility.hpp + ${CMAKE_CURRENT_LIST_DIR}/cacheable.hpp + ${CMAKE_CURRENT_LIST_DIR}/type_registry.hpp + ${CMAKE_CURRENT_LIST_DIR}/cache_type_traits.hpp ${CMAKE_CURRENT_LIST_DIR}/metadata_registry.hpp - ${CMAKE_CURRENT_LIST_DIR}/rocpd_post_processing.hpp + ${CMAKE_CURRENT_LIST_DIR}/rocpd_processor.hpp + ${CMAKE_CURRENT_LIST_DIR}/sample_processor.hpp ${CMAKE_CURRENT_LIST_DIR}/sample_type.hpp ) -target_sources( +target_sources(rocprofiler-systems-core-library PRIVATE ${trace_cache_sources}) + +target_include_directories( rocprofiler-systems-core-library - PRIVATE ${trace_cache_sources} ${trace_cache_headers} + PUBLIC ${CMAKE_CURRENT_LIST_DIR} ) diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.cpp index 9239457891..74cc0e47f1 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.cpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.cpp @@ -40,162 +40,89 @@ namespace rocprofsys namespace trace_cache { -namespace -{ -constexpr auto CACHE_FILE_FLUSH_TIMEOUT = 10ms; -constexpr auto NUM_OF_THREADS = 1; -} // namespace +flush_worker_t::flush_worker_t(worker_function_t worker_function, + worker_synchronization_ptr_t worker_synchronization_ptr, + std::string filepath) -buffer_storage::buffer_storage() -{ - ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false); - m_thread_pool = std::make_unique(NUM_OF_THREADS); - m_thread_pool->initialize_threadpool(NUM_OF_THREADS); - - m_task_group = std::make_unique>(m_thread_pool.get()); -} +: m_worker_function(std::move(worker_function)) +, m_worker_synchronization(std::move(worker_synchronization_ptr)) +, m_filepath(std::move(filepath)) +{} void -buffer_storage::start_flushing_thread(pid_t _pid) +flush_worker_t::start(const pid_t& current_pid) { + if(m_worker_synchronization->is_running) + { + std::stringstream _ss; + _ss << "Flush worker is already running"; + throw std::runtime_error(_ss.str()); + } + + m_ofs = std::ofstream{ m_filepath, std::ios::binary | std::ios::out }; + + if(!m_ofs.good()) + { + std::stringstream _ss; + _ss << "Error opening file for writing: " << m_filepath; + throw std::runtime_error(_ss.str()); + } + + m_worker_synchronization->origin_pid = current_pid; + m_worker_synchronization->is_running = true; + ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false); - m_task_group->exec([this, _pid]() { - auto filepath = get_buffered_storage_filename(get_root_process_id(), getpid()); - std::ofstream _ofs(filepath, std::ios::binary | std::ios::out); - if(!_ofs) - { - std::stringstream _ss; - _ss << "Error opening file for writing: " << filepath; - throw std::runtime_error(_ss.str()); - } - - auto execute_flush = [&](std::ofstream& ofs, bool force = false) { - size_t _head, _tail; - { - std::lock_guard guard{ m_mutex }; - _head = m_head; - _tail = m_tail; - - if(_head == _tail) - { - return; - } - - auto used_space = - m_head > m_tail ? (m_head - m_tail) : (buffer_size - m_tail + m_head); - if(!force && used_space < flush_threshold) - { - return; - } - m_tail = m_head; - } - - if(_head > _tail) - { - ofs.write(reinterpret_cast(m_buffer->data() + _tail), - _head - _tail); - } - else - { - ofs.write(reinterpret_cast(m_buffer->data() + _tail), - buffer_size - _tail); - ofs.write(reinterpret_cast(m_buffer->data()), _head); - } - }; - - m_created_process = _pid; + m_flushing_thread = std::make_unique([&]() { std::mutex _shutdown_condition_mutex; - while(m_running) + while(m_worker_synchronization->is_running) { - execute_flush(_ofs); + m_worker_function(m_ofs, false); std::unique_lock _lock{ _shutdown_condition_mutex }; - m_shutdown_condition.wait_for( - _lock, std::chrono::milliseconds(CACHE_FILE_FLUSH_TIMEOUT), - [&]() { return !m_running; }); + m_worker_synchronization->is_running_condition.wait_for( + _lock, CACHE_FILE_FLUSH_TIMEOUT, + [&]() { return !m_worker_synchronization->is_running; }); } - execute_flush(_ofs, true); - _ofs.close(); - m_exit_finished = true; - m_exit_condition.notify_one(); + m_worker_function(m_ofs, true); + m_ofs.close(); + m_worker_synchronization->exit_finished = true; + m_worker_synchronization->exit_finished_condition.notify_one(); }); } - -buffer_storage::~buffer_storage() -{ - shutdown(); - if(m_thread_pool && m_thread_pool->is_alive()) - { - m_thread_pool->destroy_threadpool(); - } -} - void -buffer_storage::shutdown() +flush_worker_t::stop(const pid_t& current_pid) { - if(!m_running) + const bool flushing_thread_exist = m_flushing_thread != nullptr; + const bool worker_is_running = + m_worker_synchronization != nullptr && m_worker_synchronization->is_running; + + if(flushing_thread_exist && worker_is_running) { - return; - } + ROCPROFSYS_DEBUG("Buffer storage shutting down..\n"); + m_worker_synchronization->is_running = false; + m_worker_synchronization->is_running_condition.notify_all(); - ROCPROFSYS_DEBUG("Buffer storage shutting down.."); - m_running = false; - m_shutdown_condition.notify_all(); - - if(m_created_process != getpid()) - { - ROCPROFSYS_DEBUG( - "Buffer storage is not created in same process as shutting down.."); - return; - } - - std::mutex _exit_mutex; - std::unique_lock _exit_lock{ _exit_mutex }; - m_exit_condition.wait(_exit_lock, [&]() { return m_exit_finished; }); - - if(m_thread_pool && m_thread_pool->is_alive()) - { - m_thread_pool->destroy_threadpool(); - } -} - -void -buffer_storage::fragment_memory() -{ - auto* _data = m_buffer->data(); - memset(_data + m_head, 0xFFFF, buffer_size - m_head); - *reinterpret_cast(_data + m_head) = entry_type::fragmented_space; - - size_t remaining_bytes = buffer_size - m_head - minimal_fragmented_memory_size; - *reinterpret_cast(_data + m_head + sizeof(entry_type)) = remaining_bytes; - m_head = 0; -} - -uint8_t* -buffer_storage::reserve_memory_space(size_t len) -{ - size_t _size; - { - std::lock_guard scope{ m_mutex }; - - if((m_head + len + minimal_fragmented_memory_size) > buffer_size) + const bool thread_is_created_in_this_process = + current_pid == m_worker_synchronization->origin_pid; + if(!thread_is_created_in_this_process) { - fragment_memory(); + ROCPROFSYS_DEBUG( + "Buffer storage is not created in same process as shutting down..\n"); + return; + } + + std::mutex _exit_mutex; + std::unique_lock _exit_lock{ _exit_mutex }; + m_worker_synchronization->exit_finished_condition.wait( + _exit_lock, [&]() { return m_worker_synchronization->exit_finished.load(); }); + + if(m_flushing_thread->joinable()) + { + m_flushing_thread->join(); + m_flushing_thread.reset(); } - _size = m_head; - m_head = m_head + len; } - - auto* _result = m_buffer->data() + _size; - memset(_result, 0, len); - return _result; -} - -bool -buffer_storage::is_running() const -{ - return m_running; } } // namespace trace_cache diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.hpp index c72a6313a7..78dcebc656 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.hpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/buffer_storage.hpp @@ -22,21 +22,21 @@ #pragma once -#include "PTL/TaskGroup.hh" -#include "PTL/ThreadPool.hh" -#include "cache_utility.hpp" -#include "sample_type.hpp" -#include +#include "core/trace_cache/cacheable.hpp" + +#include "common/defines.h" +#include "core/debug.hpp" + #include #include +#include #include #include #include #include #include -#include -#include #include + #include namespace rocprofsys @@ -44,134 +44,226 @@ namespace rocprofsys namespace trace_cache { -class cache_manager; +using ofs_t = std::basic_ostream; +using worker_function_t = std::function; + +struct worker_synchronization_t +{ + std::condition_variable is_running_condition; + std::atomic_bool is_running{ false }; + + std::condition_variable exit_finished_condition; + std::atomic_bool exit_finished{ false }; + + pid_t origin_pid; +}; +using worker_synchronization_ptr_t = std::shared_ptr; + +struct flush_worker_t +{ + explicit flush_worker_t(worker_function_t worker_function, + worker_synchronization_ptr_t worker_synchronization_ptr, + std::string filepath); + + void start(const pid_t& current_pid); + + void stop(const pid_t& current_pid); + +private: + worker_function_t m_worker_function; + worker_synchronization_ptr_t m_worker_synchronization; + std::string m_filepath; + std::ofstream m_ofs; + std::unique_ptr m_flushing_thread; +}; + +struct flush_worker_factory_t +{ + using worker_t = flush_worker_t; + + flush_worker_factory_t() = delete; + flush_worker_factory_t(flush_worker_factory_t&) = delete; + flush_worker_factory_t& operator=(flush_worker_factory_t&) = delete; + flush_worker_factory_t(flush_worker_factory_t&&) = delete; + flush_worker_factory_t& operator=(flush_worker_factory_t&&) = delete; + + static std::shared_ptr get_worker( + worker_function_t worker_function, + const worker_synchronization_ptr_t& worker_synchronization_ptr, + std::string filepath) + { + return std::make_shared(worker_function, worker_synchronization_ptr, + std::move(filepath)); + } +}; + +template class buffer_storage { -public: - static buffer_storage& get_instance(); + static_assert(type_traits::is_enum_class_v, + "TypeIdentifierEnum must be an enum class"); - template - void store(entry_type type, T&&... values) +public: + explicit buffer_storage(std::string filepath) + : m_worker{ std::move( + WorkerFactory::get_worker([this](ofs_t& ofs, bool force) { flush(ofs, force); }, + m_worker_synchronization, std::move(filepath))) } + {} + + ~buffer_storage() { shutdown(); } + + void start(const pid_t& current_pid = getpid()) { - if(!is_running()) + if(m_worker == nullptr) { throw std::runtime_error( - "Trying to use buffered storage while it is not running"); + "Worker is null - unable to start buffered storage."); + } + + if(is_running()) + { return; } - constexpr bool is_supported_type = (supported_types::is_supported && ...); - static_assert(is_supported_type, - "Supported types are const char*, char*, " - "unsigned long, unsigned int, long, unsigned " - "char, std::vector, double, and int."); - - auto arg_size = get_size(values...); - auto total_size = arg_size + sizeof(type) + sizeof(size_t); - auto* reserved_memory = reserve_memory_space(total_size); - size_t position = 0; - - auto store_value = [&](const auto& val) { - using Type = decltype(val); - size_t len = 0; - auto* dest = reserved_memory + position; - if constexpr(std::is_same_v, const char*>) - { - len = strlen(val) + 1; - std::memcpy(dest, val, len); - } - else if constexpr(std::is_same_v, std::vector>) - { - size_t elem_count = val.size(); - len = elem_count + sizeof(size_t); - std::memcpy(dest, &elem_count, sizeof(size_t)); - std::memcpy(dest + sizeof(size_t), val.data(), val.size()); - } - else - { - using ClearType = std::decay_t; - len = sizeof(ClearType); - *reinterpret_cast(dest) = val; - } - position += len; - }; - - store_value(type); - store_value(arg_size); - - (store_value(values), ...); + m_worker->start(current_pid); } - void start_flushing_thread(pid_t pid); - ~buffer_storage(); + void shutdown(const pid_t& current_pid = getpid()) + { + if(m_worker == nullptr) + { + throw std::runtime_error( + "Worker is null - unable to shutdown buffered storage."); + return; + } + + if(!is_running()) + { + return; + } + + m_worker->stop(current_pid); + } + + template + auto store(const Type& value) + { + if(m_worker == nullptr || !is_running()) + { + throw std::runtime_error( + "Trying to use buffered storage while it is not running"); + } + + type_traits::check_type(); + + using TypeIdentifierEnumUderlayingType = + std::underlying_type_t; + + size_t sample_size = get_size(value); + size_t bytes_to_reserve = header_size + sample_size; + auto* buf = reserve_memory_space(bytes_to_reserve); + size_t position = 0; + auto type_identifier_value = + static_cast(Type::type_identifier); + + utility::store_value(type_identifier_value, buf, position); + utility::store_value(sample_size, buf, position); + serialize(buf + position, value); + } + + ROCPROFSYS_INLINE bool is_running() const + { + return m_worker_synchronization != nullptr && + m_worker_synchronization->is_running; + } private: - friend class cache_manager; - buffer_storage(); - void shutdown(); - bool is_running() const; - void fragment_memory(); - uint8_t* reserve_memory_space(size_t len); - - template - struct typelist + void flush(ofs_t& ofs, bool force) { - template - constexpr static bool is_supported = - (std::is_same_v, Types> || ...); - }; - - using supported_types = typelist, uint8_t, int64_t, double>; - - template - static constexpr bool is_string_literal_v = - std::is_same_v, const char*> || - std::is_same_v, char*>; - - template - constexpr size_t get_size_impl(T&& val) - { - if constexpr(is_string_literal_v) + size_t _head, _tail; { - size_t size = 0; - while(val[size] != '\0') + std::lock_guard guard{ m_mutex }; + _head = m_head; + _tail = m_tail; + + if(_head == _tail) { - size++; + return; } - return ++size; + + auto used_space = + m_head > m_tail ? (m_head - m_tail) : (buffer_size - m_tail + m_head); + if(!force && used_space < flush_threshold) + { + return; + } + m_tail = m_head; } - else if constexpr(std::is_same_v, std::vector>) + + if(_head > _tail) { - return val.size() + sizeof(size_t); + ofs.write(reinterpret_cast(m_buffer->data() + _tail), + _head - _tail); } else { - return sizeof(T); + ofs.write(reinterpret_cast(m_buffer->data() + _tail), + buffer_size - _tail); + ofs.write(reinterpret_cast(m_buffer->data()), _head); + } + if(ofs.fail()) + { + ROCPROFSYS_WARNING(1, "Error flushing buffered storage to file for pid: %d", + m_worker_synchronization->origin_pid); + ROCPROFSYS_CI_THROW(true, + "Error flushing buffered storage to file for pid: %d", + m_worker_synchronization->origin_pid); } } - template - constexpr size_t get_size(T&&... val) + void fragment_memory() { - auto total_size = 0; - ((total_size += get_size_impl(val)), ...); - return total_size; + auto* _data = m_buffer->data(); + memset(_data + m_head, std::numeric_limits::max(), buffer_size - m_head); + *reinterpret_cast(_data + m_head) = + TypeIdentifierEnum::fragmented_space; + + size_t remaining_bytes = buffer_size - m_head - header_size; + *reinterpret_cast(_data + m_head + sizeof(TypeIdentifierEnum)) = + remaining_bytes; + m_head = 0; + } + + ROCPROFSYS_INLINE uint8_t* reserve_memory_space(const size_t& number_of_bytes) + { + size_t _size; + { + std::lock_guard scope{ m_mutex }; + + if(__builtin_expect((m_head + number_of_bytes + + header_size) > buffer_size, + 0)) + { + fragment_memory(); + } + _size = m_head; + m_head += number_of_bytes; + } + + return m_buffer->data() + _size; } private: - std::mutex m_mutex; - std::condition_variable m_exit_condition; - bool m_exit_finished{ false }; - bool m_running{ true }; - std::condition_variable m_shutdown_condition; + worker_synchronization_ptr_t m_worker_synchronization{ + std::make_shared() + }; - std::unique_ptr m_thread_pool; - std::unique_ptr> m_task_group; - size_t m_head{ 0 }; - size_t m_tail{ 0 }; - std::unique_ptr m_buffer{ std::make_unique() }; - pid_t m_created_process; + std::shared_ptr m_worker; + + std::mutex m_mutex; + size_t m_head{ 0 }; + size_t m_tail{ 0 }; + std::unique_ptr m_buffer{ std::make_unique() }; }; } // namespace trace_cache diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp index 91c58deb33..db0267ad6d 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp @@ -21,14 +21,17 @@ // SOFTWARE. #include "cache_manager.hpp" -#include "agent_manager.hpp" + +#include "core/trace_cache/metadata_registry.hpp" +#include "core/trace_cache/rocpd_processor.hpp" +#include "core/trace_cache/sample_processor.hpp" + +#include "core/agent_manager.hpp" #include "core/config.hpp" -#include "core/trace_cache/storage_parser.hpp" -#include "debug.hpp" +#include "core/debug.hpp" + #include "library/runtime.hpp" -#include "trace_cache/cache_utility.hpp" -#include "trace_cache/metadata_registry.hpp" -#include "trace_cache/rocpd_post_processing.hpp" + #include #include #include @@ -37,8 +40,81 @@ namespace rocprofsys { namespace trace_cache { -namespace + +namespace data { +struct cache_files_t +{ + std::string buff_storage; + std::string metadata; + inline bool empty() const { return buff_storage.empty() || metadata.empty(); } +}; + +struct enabled_formats_t +{ + bool rocpd = get_use_rocpd(); + + void print() const + { + constexpr std::pair formats[] = { + { "rocpd", &enabled_formats_t::rocpd }, + }; + + bool any_enabled = false; + for(const auto& fmt : formats) + any_enabled |= this->*(fmt.second); + + if(!any_enabled) return; + + bool first = true; + std::stringstream ss; + + for(const auto& fmt : formats) + { + if(this->*(fmt.second)) + { + if(!first && sizeof(formats) > 1) ss << ", "; + ss << fmt.first; + first = false; + } + } + ROCPROFSYS_PRINT( + "Generating [%s] format(s) with collected data from trace cache. This may " + "take a while..\n", + ss.str().c_str()); + } +}; + +struct processor_config_t +{ + processor_config_t(pid_t pid, pid_t ppid, + std::shared_ptr metadata_registry_ptr, + std::shared_ptr agent_manager_ptr) + : _pid(pid) + , _ppid(ppid) + , _metadata_registry(std::move(metadata_registry_ptr)) + , _agent_manager(std::move(agent_manager_ptr)) + {} + + pid_t _pid; + pid_t _ppid; + + std::shared_ptr _metadata_registry; + std::shared_ptr _agent_manager; +}; + +struct processor_storage_t +{ + std::shared_ptr rocpd_processor{ nullptr }; +}; + +using directory_files_t = std::vector; +using mapped_cache_files_t = std::map; +} // namespace data + +namespace filesystem_utils +{ + void remove_if_exists(const std::string& fname) { @@ -64,19 +140,29 @@ remove_if_exists(const std::string& fname) } } -std::vector -list_dir_files(const std::string& path) +data::directory_files_t +list_dir_files(const std::string& _path) { - DIR* dir = opendir(path.c_str()); - if(dir == nullptr) + if(_path.empty()) { - ROCPROFSYS_THROW("Error opening directory: %s", path.c_str()); + return {}; } - std::vector result{}; - dirent* entry; + auto dir_deleter = [](DIR* d) { + if(d) closedir(d); + }; - while((entry = readdir(dir)) != nullptr) + std::unique_ptr dir(opendir(_path.c_str()), dir_deleter); + + if(!dir) + { + ROCPROFSYS_THROW("Error opening directory: %s", _path.c_str()); + } + + data::directory_files_t result{}; + dirent* entry; + + while((entry = readdir(dir.get())) != nullptr) { if(std::string(entry->d_name) != "." && std::string(entry->d_name) != "..") { @@ -84,25 +170,26 @@ list_dir_files(const std::string& path) } } - closedir(dir); return result; } -struct cache_files +data::mapped_cache_files_t +get_cache_files(const pid_t& root_pid, + const data::directory_files_t& _files_from_temp_directory) { - std::string buff_storage; - std::string metadata; -}; + if(_files_from_temp_directory.empty()) + { + return {}; + } -std::map -get_cache_files() -{ - const auto root_pid = get_root_process_id(); - const auto tmp_files = list_dir_files("/tmp/"); - - std::map cache_map{}; + data::mapped_cache_files_t cache_map{}; auto parse_and_fill_cache = [&](const std::string& filename) { + if(filename.empty()) + { + return; + } + const std::regex buff_regex(R"(buffered_storage_(\d+)_(\d+)\.bin)"); const std::regex meta_regex(R"(metadata_(\d+)_(\d+)\.json)"); std::smatch match; @@ -113,7 +200,7 @@ get_cache_files() int pid = std::stoi(match[2]); if(parent_pid == root_pid) { - cache_map[pid].buff_storage = "/tmp/" + filename; + cache_map[pid].buff_storage = trace_cache::tmp_directory + filename; } } else if(std::regex_match(filename, match, meta_regex)) @@ -122,16 +209,119 @@ get_cache_files() int pid = std::stoi(match[2]); if(parent_pid == root_pid) { - cache_map[pid].metadata = "/tmp/" + filename; + cache_map[pid].metadata = trace_cache::tmp_directory + filename; } } }; - std::for_each(tmp_files.begin(), tmp_files.end(), parse_and_fill_cache); + std::for_each(_files_from_temp_directory.begin(), _files_from_temp_directory.end(), + parse_and_fill_cache); return cache_map; } -} // namespace +void +clear_cache_files(const data::mapped_cache_files_t& _cache_files) +{ + ROCPROFSYS_PRINT("Removing cached temporary files...\n"); + for(const auto& [_, files] : _cache_files) + { + ROCPROFSYS_DEBUG("Removing cached temporary file: %s\n", + files.buff_storage.c_str()); + filesystem_utils::remove_if_exists(files.buff_storage); + + ROCPROFSYS_DEBUG("Removing cached temporary file: %s\n", files.metadata.c_str()); + filesystem_utils::remove_if_exists(files.metadata); + } +} + +} // namespace filesystem_utils + +namespace processing_utils +{ +[[nodiscard]] data::processor_storage_t +configure_processors(const std::shared_ptr& _type_processing, + const std::shared_ptr& _processor_config, + const data::enabled_formats_t& _enabled_formats) +{ + data::processor_storage_t processor_storage; + if(_enabled_formats.rocpd) + { + processor_storage.rocpd_processor = std::make_shared( + _processor_config->_metadata_registry, _processor_config->_agent_manager, + _processor_config->_pid, _processor_config->_ppid); + _type_processing->add_handler(*processor_storage.rocpd_processor); + } + return processor_storage; +} + +void +process_buffered_storage( + const std::shared_ptr& _processor_config, + const std::string& _storage_filename, const data::enabled_formats_t& _enabled_formats) +{ + auto _processor_coordinator = std::make_shared(); + auto processor_storage = + configure_processors(_processor_coordinator, _processor_config, _enabled_formats); + storage_parser_t _parser(_storage_filename); + + _processor_coordinator->prepare_for_processing(); + _parser.load(_processor_coordinator); + _processor_coordinator->finalize_processing(); +} + +std::vector> +create_processor_configs(const data::mapped_cache_files_t& _cache_files, + const pid_t& _root_pid) +{ + constexpr size_t ROOT_PROCESS_INCREMENT{ 1 }; + + std::vector> processor_configs; + processor_configs.reserve(_cache_files.size() + ROOT_PROCESS_INCREMENT); + + for(const auto& [pid, files] : _cache_files) + { + if(files.empty()) + { + continue; + } + + std::vector> _agents; + auto _metadata = std::make_shared(); + _metadata->load_from_file(files.metadata, _agents); + + auto _agent_manager = std::make_shared(_agents); + + processor_configs.push_back(std::make_shared( + pid, _root_pid, _metadata, _agent_manager)); + } + return processor_configs; +} + +void +multithreaded_processing( + const std::vector>& _processor_configs, + const data::enabled_formats_t& _enabled_formats) +{ + ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false); + + std::vector processing_threads; + processing_threads.reserve(_processor_configs.size()); + for(const auto& processor_config : _processor_configs) + { + processing_threads.emplace_back( + process_buffered_storage, processor_config, + utility::get_buffered_storage_filename(processor_config->_ppid, + processor_config->_pid), + _enabled_formats); + } + + for(auto& thread : processing_threads) + { + thread.join(); + } +} + +} // namespace processing_utils cache_manager& cache_manager::get_instance() @@ -143,90 +333,37 @@ cache_manager::get_instance() void cache_manager::post_process_bulk() { - if(is_root_process()) + if(!is_root_process()) { - if(m_storage.is_running()) - { - ROCPROFSYS_WARNING(2, - "Postprocessing called without previously shutting down " - "cache storage. Calling shutdown explicitly..\n"); - shutdown(); - } - - auto _cache_files = get_cache_files(); - - if(get_use_rocpd()) - { - ROCPROFSYS_PRINT( - "Generating rocpd with collected data. This may take a while..\n"); - - std::vector rocpd_threads; - ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false); - - rocpd_threads.emplace_back([this]() { - auto pid = getpid(); - auto ppid = get_root_process_id(); - rocpd_post_processing _post_processing( - m_metadata, get_agent_manager_instance(), pid, ppid); - storage_parser _parser( - get_buffered_storage_filename(get_root_process_id(), getpid())); - _post_processing.register_parser_callback(_parser); - _post_processing.post_process_metadata(); - _parser.consume_storage(); - }); - - for(const auto& [pid, files] : _cache_files) - { - if(!files.buff_storage.empty() && !files.metadata.empty()) - { - rocpd_threads.emplace_back([pid = pid, files = files]() { - ROCPROFSYS_DEBUG( - "Creating database for [%d] from buffered storage " - "file: %s and from metadata file: %s\n", - pid, files.buff_storage.c_str(), files.metadata.c_str()); - - std::vector> _agents; - metadata_registry _metadata; - - auto res = _metadata.load_from_file(files.metadata, _agents); - if(!res) - { - ROCPROFSYS_WARNING(0, - "Load from file for metadata failed: %s\n", - files.metadata.c_str()); - return; - } - - agent_manager _agent_manager{ _agents }; - auto ppid = get_root_process_id(); - rocpd_post_processing _post_processing(_metadata, _agent_manager, - pid, ppid); - storage_parser _parser(files.buff_storage); - _post_processing.register_parser_callback(_parser); - _post_processing.post_process_metadata(); - _parser.consume_storage(); - }); - } - } - - for(auto& thread : rocpd_threads) - { - thread.join(); - } - } - - ROCPROFSYS_PRINT("Removing cached temporary files...\n"); - - for(const auto& [pid, files] : _cache_files) - { - ROCPROFSYS_PRINT("Removing cached temporary file: %s\n", - files.buff_storage.c_str()); - ROCPROFSYS_PRINT("Removing cached temporary file: %s\n", - files.metadata.c_str()); - remove_if_exists(files.buff_storage.c_str()); - remove_if_exists(files.metadata.c_str()); - } + return; } + + if(m_storage.is_running()) + { + ROCPROFSYS_WARNING(2, "Postprocessing called without previously shutting down " + "cache storage. Calling shutdown explicitly..\n"); + shutdown(); + } + + const auto root_pid = get_root_process_id(); + const auto temp_directory_content = + filesystem_utils::list_dir_files(trace_cache::tmp_directory); + + const auto cache_files = + filesystem_utils::get_cache_files(root_pid, temp_directory_content); + const data::enabled_formats_t enabled_formats; + enabled_formats.print(); + + auto processor_configs = + processing_utils::create_processor_configs(cache_files, root_pid); + + processor_configs.push_back(std::make_shared( + getpid(), root_pid, m_metadata, + std::make_shared(get_agent_manager_instance().get_agents()))); + + processing_utils::multithreaded_processing(processor_configs, enabled_formats); + + filesystem_utils::clear_cache_files(cache_files); } void diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.hpp index 86953f0454..7091272943 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.hpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.hpp @@ -22,30 +22,43 @@ #pragma once -#include "buffer_storage.hpp" -#include "metadata_registry.hpp" -#include "storage_parser.hpp" +#include "core/trace_cache/buffer_storage.hpp" +#include "core/trace_cache/metadata_registry.hpp" +#include "core/trace_cache/sample_type.hpp" +#include "core/trace_cache/storage_parser.hpp" +#include namespace rocprofsys { namespace trace_cache { +using storage_parser_t = + storage_parser; + +using buffer_storage_t = buffer_storage; + class cache_manager { public: static cache_manager& get_instance(); - buffer_storage& get_buffer_storage() { return m_storage; } - metadata_registry& get_metadata_registry() { return m_metadata; } + buffer_storage_t& get_buffer_storage() { return m_storage; } + metadata_registry& get_metadata_registry() { return *m_metadata; } void shutdown(); void post_process_bulk(); private: - void post_process_metadata(); cache_manager() = default; - buffer_storage m_storage; - metadata_registry m_metadata; + buffer_storage_t m_storage{ utility::get_buffered_storage_filename( + get_root_process_id(), getpid()) }; + + std::shared_ptr m_metadata{ + std::make_shared() + }; }; inline metadata_registry& @@ -54,7 +67,7 @@ get_metadata_registry() return cache_manager::get_instance().get_metadata_registry(); } -inline buffer_storage& +inline buffer_storage_t& get_buffer_storage() { return cache_manager::get_instance().get_buffer_storage(); diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_type_traits.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_type_traits.hpp new file mode 100644 index 0000000000..405e3cc363 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_type_traits.hpp @@ -0,0 +1,173 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#include +#include +#include +#include +#include +#include + +namespace rocprofsys +{ +namespace trace_cache +{ + +namespace type_traits +{ + +template +struct always_false : std::false_type +{}; + +} // namespace type_traits + +template +void +serialize(uint8_t*, const T&) +{ + static_assert(type_traits::always_false::value, "serialize not specialized"); +} + +template +T +deserialize(uint8_t*&) +{ + static_assert(type_traits::always_false::value, "deserialize not specialized"); + return T{}; +} + +template +size_t +get_size(const T&) +{ + static_assert(type_traits::always_false::value, "get_size(T) not specialized"); + return 0; +} + +namespace type_traits +{ + +template +struct tuple_to_variant; + +template +struct tuple_to_variant> +{ + using type = std::variant; +}; + +template +using void_t = void; + +template +struct typelist +{ + template + constexpr static bool is_supported = (std::is_same_v, Types> || ...); +}; + +using supported_types = typelist, uint8_t, int64_t, double>; + +template +static constexpr bool is_string_view_v = + std::is_same_v, std::string_view>; + +template +struct is_enum_class +: std::bool_constant && + !std::is_convertible_v>> +{}; + +template +inline constexpr bool is_enum_class_v = is_enum_class::value; + +template +struct has_type_identifier : std::false_type +{}; + +template +struct has_type_identifier> +: std::bool_constant< + is_enum_class_v && + std::is_convertible_v> +{}; + +template +struct has_serialize : std::false_type +{}; + +template +struct has_serialize(), + std::declval()))>> +: std::true_type +{}; + +template +struct has_deserialize : std::false_type +{}; + +template +struct has_deserialize< + T, void_t(std::declval())), T>>> +: std::true_type +{}; + +template +struct has_get_size : std::false_type +{}; + +template +struct has_get_size()))>> +: std::true_type +{}; + +template +__attribute__((always_inline)) inline constexpr void +check_type() +{ + static_assert(has_serialize::value, "Type doesn't have `serialize` function."); + static_assert(has_deserialize::value, "Type doesn't have `deserialize` function."); + static_assert(has_get_size::value, "Type doesn't have `get_size` function."); + static_assert(has_type_identifier::value, + "Type doesn't have `type_identifier` member with correct type."); +} + +template +struct has_execute_processing : std::false_type +{}; + +template +struct has_execute_processing< + T, TypeIdentifierEnum, CacheableType, + void_t().execute_sample_processing( + std::declval(), std::declval()))>> +: std::bool_constant().execute_sample_processing( + std::declval(), std::declval()))>> +{}; + +} // namespace type_traits +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/cacheable.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/cacheable.hpp new file mode 100644 index 0000000000..0fef2bfdd3 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/cacheable.hpp @@ -0,0 +1,176 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#include "core/trace_cache/cache_type_traits.hpp" +#include "library/runtime.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std::chrono_literals; + +namespace rocprofsys +{ + +namespace trace_cache +{ + +struct cacheable_t +{ + cacheable_t() = default; +}; + +constexpr size_t MByte = 1024 * 1024; +constexpr size_t buffer_size = 100 * MByte; +constexpr size_t flush_threshold = 80 * MByte; +constexpr auto CACHE_FILE_FLUSH_TIMEOUT = 10ms; + +constexpr auto ABSOLUTE = "ABS"; +constexpr auto PERCENTAGE = "%"; + +template +constexpr size_t header_size = sizeof(TypeIdentifierEnum) + sizeof(size_t); +using buffer_array_t = std::array; + +const auto tmp_directory = std::string{ "/tmp/" }; + +namespace utility +{ + +const auto get_buffered_storage_filename = [](const int& ppid, const int& pid) { + return std::string{ tmp_directory + "buffered_storage_" + std::to_string(ppid) + "_" + + std::to_string(pid) + ".bin" }; +}; + +const auto get_metadata_filepath = [](const int& ppid, const int& pid) { + return std::string{ tmp_directory + "metadata_" + std::to_string(ppid) + "_" + + std::to_string(pid) + ".json" }; +}; + +template +__attribute__((always_inline)) inline constexpr size_t +get_size(Type&& val) +{ + using DecayedType = std::decay_t; + static_assert(type_traits::supported_types::is_supported, + "Unsupported type in get_size"); + + if constexpr(type_traits::is_string_view_v || + std::is_same_v>) + { + return val.size() + sizeof(size_t); + } + else + { + return sizeof(DecayedType); + } +} + +template +__attribute__((always_inline)) inline constexpr size_t +get_size(Type&& val, Types&&... vals) +{ + return get_size(std::forward(val)) + get_size(std::forward(vals)...); +} + +template +__attribute__((always_inline)) inline void +store_value(const Type& value, uint8_t* buffer, size_t& position) +{ + using DecayedType = std::decay_t; + static_assert(type_traits::supported_types::is_supported, + "Unsupported type in store_value"); + + auto* dest = buffer + position; + + if constexpr(type_traits::is_string_view_v || + std::is_same_v>) + { + const size_t elem_count = value.size(); + *reinterpret_cast(dest) = elem_count; + std::memcpy(dest + sizeof(size_t), value.data(), elem_count); + position += elem_count + sizeof(size_t); + } + else + { + *reinterpret_cast(dest) = value; + position += sizeof(DecayedType); + } +} + +template +__attribute__((always_inline)) inline void +store_value(uint8_t* buffer, const Types&... values) +{ + size_t position = 0; + (store_value(values, buffer, position), ...); +} + +template +__attribute__((always_inline)) inline static void +parse_value(uint8_t*& data_pos, Type& arg) +{ + using DecayedType = std::decay_t; + static_assert(type_traits::supported_types::is_supported, + "Unsupported type in parse_value"); + + if constexpr(type_traits::is_string_view_v) + { + const size_t string_size = *reinterpret_cast(data_pos); + data_pos += sizeof(size_t); + arg = std::string_view{ reinterpret_cast(data_pos), string_size }; + data_pos += string_size; + } + else if constexpr(std::is_same_v>) + { + const size_t vector_size = *reinterpret_cast(data_pos); + data_pos += sizeof(size_t); + arg.reserve(vector_size); + std::copy_n(data_pos, vector_size, std::back_inserter(arg)); + data_pos += vector_size; + } + else + { + arg = *reinterpret_cast(data_pos); + data_pos += sizeof(DecayedType); + } +} + +template +__attribute__((always_inline)) inline static void +parse_value(uint8_t*& data_pos, Type& arg, Types&... args) +{ + parse_value(data_pos, arg); + parse_value(data_pos, args...); +} + +} // namespace utility +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.cpp index 9eed5c9d5e..749c332dd7 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.cpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.cpp @@ -38,6 +38,23 @@ namespace trace_cache namespace { +class thread_local_string_pool +{ +public: + const char* store(std::string_view str) + { + auto [it, inserted] = m_strings.emplace(str); + return it->c_str(); + } + + void clear() { m_strings.clear(); } + +private: + std::set m_strings; +}; + +thread_local thread_local_string_pool g_string_pool; + template std::optional get_type_info(const DataType& data, const Filter& filter) @@ -215,12 +232,11 @@ from_json_code_object(const nlohmann::json& _json) rocprofiler_callback_tracing_code_object_load_data_t co = {}; co.code_object_id = _json["code_object_id"].get(); auto uri_str = _json["uri"].get(); - co.uri = new char[uri_str.size() + 1]; - strncpy(const_cast(co.uri), uri_str.c_str(), uri_str.size() + 1); - co.load_base = _json["load_base"].get(); - co.load_size = _json["load_size"].get(); - co.load_delta = _json["load_delta"].get(); - co.storage_type = static_cast( + co.uri = g_string_pool.store(uri_str); + co.load_base = _json["load_base"].get(); + co.load_size = _json["load_size"].get(); + co.load_delta = _json["load_delta"].get(); + co.storage_type = static_cast( _json["storage_type"].get()); auto handle = _json["agent_id_handle"].get(); # if(ROCPROFILER_VERSION >= 600) @@ -255,12 +271,10 @@ rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t from_json_kernel_symbol(const nlohmann::json& _json) { rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t ks = {}; - ks.kernel_id = _json["kernel_id"].get(); - ks.code_object_id = _json["code_object_id"].get(); - auto kernel_name_str = _json["kernel_name"].get(); - ks.kernel_name = new char[kernel_name_str.size() + 1]; - strncpy(const_cast(ks.kernel_name), kernel_name_str.c_str(), - sizeof(ks.kernel_name) + 1); + ks.kernel_id = _json["kernel_id"].get(); + ks.code_object_id = _json["code_object_id"].get(); + auto kernel_name_str = _json["kernel_name"].get(); + ks.kernel_name = g_string_pool.store(kernel_name_str); ks.kernel_object = _json["kernel_object"].get(); ks.kernarg_segment_size = _json["kernarg_segment_size"].get(); ks.kernarg_segment_alignment = _json["kernarg_segment_alignment"].get(); @@ -843,6 +857,7 @@ metadata_registry::save_to_file(const std::string& filep std::ofstream file(filepath); if(!file.is_open()) { + ROCPROFSYS_WARNING(1, "Error opening file for writing: %s", filepath.c_str()); return false; } @@ -851,6 +866,7 @@ metadata_registry::save_to_file(const std::string& filep return true; } catch(const std::exception& e) { + ROCPROFSYS_WARNING(1, "Error saving metadata to file: %s", e.what()); return false; } } @@ -864,6 +880,7 @@ metadata_registry::load_from_file(const std::string& filepath, std::ifstream file(filepath); if(!file.is_open()) { + ROCPROFSYS_WARNING(1, "Error opening file for reading: %s", filepath.c_str()); return false; } @@ -875,6 +892,7 @@ metadata_registry::load_from_file(const std::string& filepath, return true; } catch(const std::exception& e) { + ROCPROFSYS_WARNING(1, "Error loading metadata from file: %s", e.what()); return false; } } diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.hpp index 943ea44ad6..62e31bda4b 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.hpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/metadata_registry.hpp @@ -178,9 +178,14 @@ struct kernel_symbol_less } // namespace info -class cache_manager; struct metadata_registry { + metadata_registry(); + metadata_registry(const metadata_registry&) = delete; + metadata_registry& operator=(const metadata_registry&) = delete; + metadata_registry(metadata_registry&&) = delete; + metadata_registry& operator=(metadata_registry&&) = delete; + void set_process(const info::process& process); void add_pmc_info(const info::pmc& pmc_info); void add_thread_info(const info::thread& thread_info); @@ -224,26 +229,24 @@ struct metadata_registry #endif private: - friend class cache_manager; - metadata_registry(); - common::synchronized m_process; + common::synchronized m_process{}; common::synchronized< std::unordered_set> - m_pmc_infos; - common::synchronized> m_threads; - common::synchronized> m_tracks; + m_pmc_infos{}; + common::synchronized> m_threads{}; + common::synchronized> m_tracks{}; - common::synchronized> m_streams; - common::synchronized> m_queues; - common::synchronized> m_strings; + common::synchronized> m_streams{}; + common::synchronized> m_queues{}; + common::synchronized> m_strings{}; #if ROCPROFSYS_USE_ROCM > 0 common::synchronized> - m_code_objects; + m_code_objects{}; common::synchronized< std::set> - m_kernel_symbols; + m_kernel_symbols{}; rocprofiler::sdk::buffer_name_info_t m_buffered_tracing_info{ rocprofiler::sdk::get_buffer_tracing_names() }; diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.cpp deleted file mode 100644 index 9c58c6d466..0000000000 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.cpp +++ /dev/null @@ -1,904 +0,0 @@ -// MIT License -// -// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "trace_cache/rocpd_post_processing.hpp" -#include "agent_manager.hpp" -#include "config.hpp" -#include "debug.hpp" -#include "gpu_metrics.hpp" -#include "library/thread_info.hpp" -#include "node_info.hpp" -#include "rocpd/data_processor.hpp" -#include "rocpd/data_storage/database.hpp" -#include "trace_cache/metadata_registry.hpp" -#include "trace_cache/sample_type.hpp" -#include "trace_cache/storage_parser.hpp" -#include -#include -#include -#include -#include -#include -#include -#if ROCPROFSYS_USE_ROCM > 0 -# include "library/rocprofiler-sdk/fwd.hpp" -# include -# include -#endif - -namespace rocprofsys -{ -namespace trace_cache -{ -namespace -{ -#if ROCPROFSYS_USE_ROCM > 0 -auto -get_handle_from_code_object( - const rocprofiler_callback_tracing_code_object_load_data_t& code_object) -{ -# if(ROCPROFILER_VERSION >= 600) - return code_object.agent_id.handle; -# else - return code_object.rocp_agent.handle; -# endif -} -#endif -} // namespace - -std::shared_ptr -rocpd_post_processing::get_data_processor() const -{ - return m_data_processor; -} - -postprocessing_callback -rocpd_post_processing::get_kernel_dispatch_callback() const -{ - return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) { -#if ROCPROFSYS_USE_ROCM > 0 - auto _kds = static_cast(parsed); - - auto data_processor = get_data_processor(); - auto& n_info = node_info::get_instance(); - auto process = m_metadata.get_process_info(); - auto agent_primary_key = - m_agent_manager.get_agent_by_handle(_kds.agent_id_handle).base_id; - - auto thread_primary_key = - data_processor->map_thread_id_to_primary_key(_kds.thread_id); - - auto category_id = data_processor->insert_string( - trait::name::value); - - auto kernel_symbol = m_metadata.get_kernel_symbol(_kds.kernel_id); - - if(!kernel_symbol.has_value()) - { - throw std::runtime_error("Kernel symbol is missing for kernel dispatch"); - return; - } - - auto region_name_primary_key = data_processor->insert_string( - tim::demangle(kernel_symbol->kernel_name).c_str()); - - auto stack_id = _kds.correlation_id_internal; - auto parent_stack_id = _kds.correlation_id_ancestor; - auto correlation_id = 0; - - auto event_id = data_processor->insert_event(category_id, stack_id, - parent_stack_id, correlation_id); - - data_processor->insert_kernel_dispatch( - n_info.id, process.pid, thread_primary_key, agent_primary_key, _kds.kernel_id, - _kds.dispatch_id, _kds.queue_id_handle, _kds.stream_handle, - _kds.start_timestamp, _kds.end_timestamp, _kds.private_segment_size, - _kds.group_segment_size, _kds.workgroup_size_x, _kds.workgroup_size_y, - _kds.workgroup_size_z, _kds.grid_size_x, _kds.grid_size_y, _kds.grid_size_z, - region_name_primary_key, event_id); -#endif - }; -} - -postprocessing_callback -rocpd_post_processing::get_memory_copy_callback() const -{ - return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) { -#if ROCPROFSYS_USE_ROCM > 0 - auto _mcs = static_cast(parsed); - - auto data_processor = get_data_processor(); - auto& n_info = node_info::get_instance(); - auto process = m_metadata.get_process_info(); - - auto _name = std::string{ m_metadata.get_buffer_name_info().at( - static_cast(_mcs.kind), - static_cast(_mcs.operation)) }; - auto name_primary_key = data_processor->insert_string(_name.c_str()); - - auto category_primary_key = - data_processor->insert_string(trait::name::value); - - auto thread_primary_key = - data_processor->map_thread_id_to_primary_key(_mcs.thread_id); - - auto dst_agent_primary_key = - m_agent_manager.get_agent_by_handle(_mcs.dst_agent_id_handle).base_id; - auto src_agent_primary_key = - m_agent_manager.get_agent_by_handle(_mcs.src_agent_id_handle).base_id; - - auto stack_id = _mcs.correlation_id_internal; - auto parent_stack_id = _mcs.correlation_id_ancestor; - auto correlation_id = 0; - auto queue_id = 0; - - auto event_primary_key = data_processor->insert_event( - category_primary_key, stack_id, parent_stack_id, correlation_id); - - data_processor->insert_memory_copy( - n_info.id, process.pid, thread_primary_key, _mcs.start_timestamp, - _mcs.end_timestamp, name_primary_key, dst_agent_primary_key, - _mcs.dst_address_value, src_agent_primary_key, _mcs.src_address_value, - _mcs.bytes, queue_id, _mcs.stream_handle, name_primary_key, - event_primary_key); -#endif - }; -} - -#if(ROCPROFSYS_USE_ROCM > 0 && ROCPROFILER_VERSION >= 600) -postprocessing_callback -rocpd_post_processing::get_memory_allocate_callback() const -{ -# if ROCPROFSYS_USE_ROCM > 0 - auto memtype_to_db = - [](std::string_view memory_type) -> std::pair { - constexpr auto MEMORY_PREFIX = std::string_view{ "MEMORY_ALLOCATION_" }; - constexpr auto SCRATCH_PREFIX = std::string_view{ "SCRATCH_MEMORY_" }; - constexpr auto VMEM_PREFIX = std::string_view{ "VMEM_" }; - constexpr auto ASYNC_PREFIX = std::string_view{ "ASYNC_" }; - - std::string _type; - std::string _level; - if(memory_type.find(MEMORY_PREFIX) == 0) - { - _type = memory_type.substr(MEMORY_PREFIX.length()); - if(_type.find(VMEM_PREFIX) == 0) - { - _type = _type.substr(VMEM_PREFIX.length()); - _level = "VIRTUAL"; - } - else - { - _level = "REAL"; - } - } - else if(memory_type.find(SCRATCH_PREFIX) == 0) - { - _type = memory_type.substr(SCRATCH_PREFIX.length()); - _level = "SCRATCH"; - if(memory_type.find(ASYNC_PREFIX) == 0) - { - _type = memory_type.substr(ASYNC_PREFIX.length()); // RECLAIM - } - } - - if(_type == "ALLOCATE") - { - _type = "ALLOC"; - } - - return std::make_pair(_type, _level); - }; -# endif - - return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) { -# if ROCPROFSYS_USE_ROCM > 0 - auto _mas = static_cast(parsed); - auto data_processor = get_data_processor(); - auto& n_info = node_info::get_instance(); - auto process = m_metadata.get_process_info(); - auto thread_primary_key = - data_processor->map_thread_id_to_primary_key(_mas.thread_id); - auto agent_primary_key = std::optional{}; - - const auto invalid_context = ROCPROFILER_CONTEXT_NONE; - if(_mas.agent_id_handle != invalid_context.handle) - { - { - agent_primary_key = - m_agent_manager.get_agent_by_handle(_mas.agent_id_handle).base_id; - } - const auto* _name = m_metadata.get_buffer_name_info().at( - static_cast(_mas.kind), - static_cast(_mas.operation)); - - auto [type, level] = memtype_to_db(_name); - - auto stack_id = _mas.correlation_id_internal; - auto parent_stack_id = _mas.correlation_id_ancestor; - auto correlation_id = 0; - auto queue_id = 0; - - auto category_primary_key = data_processor->insert_string( - trait::name::value); - - auto event_primary_key = data_processor->insert_event( - category_primary_key, stack_id, parent_stack_id, correlation_id); - - data_processor->insert_memory_alloc( - n_info.id, process.pid, thread_primary_key, agent_primary_key, - type.c_str(), level.c_str(), _mas.start_timestamp, _mas.end_timestamp, - _mas.address_value, _mas.allocation_size, queue_id, _mas.stream_handle, - event_primary_key); -# endif - }; - }; -} -#endif - -postprocessing_callback -rocpd_post_processing::get_region_callback() const -{ - [[maybe_unused]] auto parse_args = []([[maybe_unused]] const std::string& arg_str) { -#if ROCPROFSYS_USE_ROCM > 0 - rocprofiler_sdk::function_args_t args; - const std::string delimiter = ";;"; - - auto split = [](const std::string& str, const std::string& _delimiter) { - std::vector tokens; - size_t start = 0; - size_t end = str.find(_delimiter); - - while(end != std::string::npos) - { - tokens.push_back(str.substr(start, end - start)); - start = end + _delimiter.length(); - end = str.find(_delimiter, start); - } - - return tokens; - }; - - if(arg_str.empty()) - { - return args; - } - - auto tokens = split(arg_str, delimiter); - - // Ensure the number of tokens is a multiple of 4 - if(tokens.size() % 4 != 0) - { - throw std::invalid_argument("Malformed argument string."); - } - - for(auto it = tokens.begin(); it != tokens.end(); it += 4) - { - rocprofiler_sdk::argument_info arg = { static_cast(std::stoi(*it)), - *(it + 1), *(it + 2), *(it + 3) }; - args.push_back(arg); - } - - return args; -#endif - }; - - return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) { -#if ROCPROFSYS_USE_ROCM > 0 - auto _rs = static_cast(parsed); - auto data_processor = get_data_processor(); - auto& n_info = node_info::get_instance(); - auto process = m_metadata.get_process_info(); - auto thread_primary_key = - data_processor->map_thread_id_to_primary_key(_rs.thread_id); - - auto name_primary_key = data_processor->insert_string(_rs.name.c_str()); - auto category_primary_key = data_processor->insert_string(_rs.category.c_str()); - - size_t stack_id = _rs.correlation_id_internal; - size_t parent_stack_id = _rs.correlation_id_ancestor; - size_t correlation_id = 0; - - auto event_primary_key = - data_processor->insert_event(category_primary_key, stack_id, parent_stack_id, - correlation_id, _rs.call_stack.c_str()); - - auto args = parse_args(_rs.args_str); - for(const auto& arg : args) - { - data_processor->insert_args(event_primary_key, arg.arg_number, - arg.arg_type.c_str(), arg.arg_name.c_str(), - arg.arg_value.c_str()); - } - - data_processor->insert_region(n_info.id, process.pid, thread_primary_key, - _rs.start_timestamp, _rs.end_timestamp, - name_primary_key, event_primary_key); -#endif - }; -} - -postprocessing_callback -rocpd_post_processing::get_backtrace_sample_callback() const -{ - return [&](const storage_parsed_type_base& parsed) { - auto _bts = static_cast(parsed); - auto data_processor = get_data_processor(); - auto& n_info = node_info::get_instance(); - auto process = m_metadata.get_process_info(); - auto thread_primary_key = - data_processor->map_thread_id_to_primary_key(_bts.thread_id); - auto name_primary_key = data_processor->insert_string(_bts.name.c_str()); - auto category_primary_key = data_processor->insert_string(_bts.category.c_str()); - - auto event_primary_key = data_processor->insert_event( - category_primary_key, 0, 0, 0, _bts.call_stack.c_str(), - _bts.line_info.c_str(), _bts.extdata.c_str()); - - data_processor->insert_region(n_info.id, process.pid, thread_primary_key, - _bts.start_timestamp, _bts.end_timestamp, - name_primary_key, event_primary_key); - data_processor->insert_sample(_bts.track_name.c_str(), _bts.start_timestamp, - event_primary_key); - }; -} - -postprocessing_callback -rocpd_post_processing::get_in_time_sample_callback() const -{ - return [&](const storage_parsed_type_base& parsed) { - auto _its = static_cast(parsed); - auto data_processor = get_data_processor(); - auto track_primary_key = data_processor->insert_string(_its.track_name.c_str()); - - auto event_id = data_processor->insert_event( - track_primary_key, _its.stack_id, _its.parent_stack_id, _its.correlation_id, - _its.call_stack.c_str(), _its.line_info.c_str(), _its.event_metadata.c_str()); - data_processor->insert_sample(_its.track_name.c_str(), _its.timestamp_ns, - event_id, "{}"); - }; -} -postprocessing_callback -rocpd_post_processing::get_pmc_event_with_sample_callback() const -{ - return [&](const storage_parsed_type_base& parsed) { - auto _pmc = static_cast(parsed); - auto data_processor = get_data_processor(); - auto track_primary_key = data_processor->insert_string(_pmc.track_name.c_str()); - - auto agent_primary_key = - m_agent_manager - .get_agent_by_id(_pmc.device_id, - static_cast(_pmc.device_type)) - .base_id; - - auto event_id = data_processor->insert_event( - track_primary_key, _pmc.stack_id, _pmc.parent_stack_id, _pmc.correlation_id, - _pmc.call_stack.c_str(), _pmc.line_info.c_str(), _pmc.event_metadata.c_str()); - data_processor->insert_sample(_pmc.track_name.c_str(), _pmc.timestamp_ns, - event_id, "{}"); - - data_processor->insert_pmc_event(event_id, agent_primary_key, - _pmc.pmc_info_name.c_str(), _pmc.value); - }; -} - -postprocessing_callback -rocpd_post_processing::get_amd_smi_sample_callback() const -{ - // Use the shared gpu_metrics_t from core/gpu_metrics.hpp - using gpu_metrics_t = gpu::gpu_metrics_t; - - return [&](const storage_parsed_type_base& parsed) { - auto _amd_smi = static_cast(parsed); - - auto data_processor = get_data_processor(); - - const auto* _name = trait::name::value; - auto name_primary_key = data_processor->insert_string(_name); - auto event_id = data_processor->insert_event(name_primary_key, 0, 0, 0); - - auto base_id = - m_agent_manager.get_agent_by_type_index(_amd_smi.device_id, agent_type::GPU) - .base_id; - - auto insert_event_and_sample = [&](bool enabled, const char* pmc_name, - const char* track_name, double value) { - if(!enabled) return; - data_processor->insert_pmc_event(event_id, base_id, pmc_name, value); - data_processor->insert_sample(track_name, _amd_smi.timestamp, event_id); - }; - - using pos = trace_cache::amd_smi_sample::settings_positions; - std::bitset<8> settings_bits(_amd_smi.settings); - bool is_busy_enabled = settings_bits.test(static_cast(pos::busy)); - bool is_temp_enabled = settings_bits.test(static_cast(pos::temp)); - bool is_power_enabled = settings_bits.test(static_cast(pos::power)); - bool is_mem_usage_enabled = settings_bits.test(static_cast(pos::mem_usage)); - - bool is_vcn_enabled = settings_bits.test(static_cast(pos::vcn_activity)); - bool is_jpeg_enabled = settings_bits.test(static_cast(pos::jpeg_activity)); - bool is_xgmi_enabled = settings_bits.test(static_cast(pos::xgmi)); - bool is_pcie_enabled = settings_bits.test(static_cast(pos::pcie)); - - insert_event_and_sample( - is_busy_enabled, trait::name::value, - info::annotate_with_device_id(_amd_smi.device_id) - .c_str(), - _amd_smi.gfx_activity); - insert_event_and_sample( - is_busy_enabled, trait::name::value, - info::annotate_with_device_id(_amd_smi.device_id) - .c_str(), - _amd_smi.umc_activity); - insert_event_and_sample( - is_busy_enabled, trait::name::value, - info::annotate_with_device_id(_amd_smi.device_id) - .c_str(), - _amd_smi.mm_activity); - insert_event_and_sample( - is_temp_enabled, trait::name::value, - info::annotate_with_device_id(_amd_smi.device_id) - .c_str(), - _amd_smi.temperature); - - insert_event_and_sample( - is_power_enabled, trait::name::value, - info::annotate_with_device_id(_amd_smi.device_id) - .c_str(), - _amd_smi.power); - insert_event_and_sample( - is_mem_usage_enabled, trait::name::value, - info::annotate_with_device_id( - _amd_smi.device_id) - .c_str(), - _amd_smi.mem_usage); - - if(!is_vcn_enabled && !is_jpeg_enabled && !is_xgmi_enabled && !is_pcie_enabled) - return; - - gpu_metrics_t gpu_metrics; - gpu::gpu_metrics_capabilities_t capabilities; - gpu::deserialize_gpu_metrics(_amd_smi.gpu_activity, gpu_metrics, is_vcn_enabled, - is_jpeg_enabled, is_xgmi_enabled, is_pcie_enabled, - capabilities); - - // Insert VCN and JPEG activity metrics - auto insert_decode_vector_metrics = [&](auto category, bool _is_enabled, - const std::vector& data, - std::optional _idx = - std::nullopt) { - if(!_is_enabled) return; - - using Category = std::decay_t; - - for(size_t i = 0; i < data.size(); ++i) - { - const auto value = data[i]; - if(value == std::numeric_limits::max()) continue; - - auto pmc_name = info::annotate_category(_idx, i); - auto track_name = - info::annotate_with_device_id(_amd_smi.device_id, _idx, i); - - insert_event_and_sample(_is_enabled, pmc_name.c_str(), track_name.c_str(), - static_cast(value)); - } - }; - - // Insert XGMI read/write data metrics - auto insert_xgmi_vector_metrics = [&](auto category, bool _is_enabled, - const std::vector& data, - std::optional _idx = std::nullopt) { - if(!_is_enabled) return; - - using Category = std::decay_t; - - for(size_t i = 0; i < data.size(); ++i) - { - const auto value = data[i]; - if(value == std::numeric_limits::max()) continue; - - auto pmc_name = info::annotate_category(_idx, i); - auto track_name = - info::annotate_with_device_id(_amd_smi.device_id, _idx, i); - - insert_event_and_sample(_is_enabled, pmc_name.c_str(), track_name.c_str(), - static_cast(value)); - } - }; - - // Insert VCN activity metrics - if(capabilities.flags.vcn_is_device_level_only) - { - // Device-level: use vcn_activity vector - insert_decode_vector_metrics(category::amd_smi_vcn_activity{}, is_vcn_enabled, - gpu_metrics.vcn_activity, std::nullopt); - } - else - { - // Per-XCP: iterate through actual XCPs in vcn_busy - for(size_t xcp = 0; xcp < gpu_metrics.vcn_busy.size(); ++xcp) - { - insert_decode_vector_metrics(category::amd_smi_vcn_activity{}, - is_vcn_enabled, gpu_metrics.vcn_busy[xcp], - xcp); - } - } - - // Insert JPEG activity metrics - if(capabilities.flags.jpeg_is_device_level_only) - { - // Device-level: use jpeg_activity vector - insert_decode_vector_metrics(category::amd_smi_jpeg_activity{}, - is_jpeg_enabled, gpu_metrics.jpeg_activity, - std::nullopt); - } - else - { - // Per-XCP: iterate through actual XCPs in jpeg_busy - for(size_t xcp = 0; xcp < gpu_metrics.jpeg_busy.size(); ++xcp) - { - insert_decode_vector_metrics(category::amd_smi_jpeg_activity{}, - is_jpeg_enabled, gpu_metrics.jpeg_busy[xcp], - xcp); - } - } - - // Insert XGMI metrics (scalar values) - insert_event_and_sample( - is_xgmi_enabled, trait::name::value, - info::annotate_with_device_id( - _amd_smi.device_id) - .c_str(), - gpu_metrics.xgmi_link_width); - - insert_event_and_sample( - is_xgmi_enabled, trait::name::value, - info::annotate_with_device_id( - _amd_smi.device_id) - .c_str(), - gpu_metrics.xgmi_link_speed); - - insert_xgmi_vector_metrics(category::amd_smi_xgmi_read_data{}, is_xgmi_enabled, - gpu_metrics.xgmi_read_data_acc, std::nullopt); - - insert_xgmi_vector_metrics(category::amd_smi_xgmi_write_data{}, is_xgmi_enabled, - gpu_metrics.xgmi_write_data_acc, std::nullopt); - - insert_event_and_sample( - is_pcie_enabled, trait::name::value, - info::annotate_with_device_id( - _amd_smi.device_id) - .c_str(), - gpu_metrics.pcie_link_width); - - insert_event_and_sample( - is_pcie_enabled, trait::name::value, - info::annotate_with_device_id( - _amd_smi.device_id) - .c_str(), - gpu_metrics.pcie_link_speed); - - insert_event_and_sample( - is_pcie_enabled, trait::name::value, - info::annotate_with_device_id( - _amd_smi.device_id) - .c_str(), - static_cast(gpu_metrics.pcie_bandwidth_acc)); - - insert_event_and_sample( - is_pcie_enabled, trait::name::value, - info::annotate_with_device_id( - _amd_smi.device_id) - .c_str(), - static_cast(gpu_metrics.pcie_bandwidth_inst)); - }; -} - -postprocessing_callback -rocpd_post_processing::get_cpu_freq_sample_callback() const -{ - struct core_freq_sample - { - size_t id; - float value; - }; - - auto deserialize_freqs = [](std::vector& buffer) { - std::vector result; - size_t offset = 0; - - while(offset + sizeof(float) + sizeof(size_t) <= buffer.size()) - { - core_freq_sample core_sample; - std::memcpy(&core_sample.id, buffer.data() + offset, sizeof(size_t)); - offset += sizeof(size_t); - std::memcpy(&core_sample.value, buffer.data() + offset, sizeof(float)); - offset += sizeof(float); - result.push_back(core_sample); - } - return result; - }; - - return [&](const storage_parsed_type_base& parsed) { - auto _cpu_freq_sample = static_cast(parsed); - - auto data_processor = get_data_processor(); - const auto* _name = trait::name::value; - auto name_primary_key = data_processor->insert_string(_name); - auto event_id = data_processor->insert_event(name_primary_key, 0, 0, 0); - - auto device_id = 0; - - auto base_id = - m_agent_manager.get_agent_by_type_index(device_id, agent_type::CPU).base_id; - - auto insert_event_and_sample = [&](const char* name, double value) { - data_processor->insert_pmc_event(event_id, base_id, name, value); - data_processor->insert_sample(name, _cpu_freq_sample.timestamp, event_id); - }; - - insert_event_and_sample(trait::name::value, - _cpu_freq_sample.page_rss); - insert_event_and_sample(trait::name::value, - _cpu_freq_sample.virt_mem_usage); - insert_event_and_sample(trait::name::value, - _cpu_freq_sample.peak_rss); - insert_event_and_sample(trait::name::value, - _cpu_freq_sample.context_switch_count); - insert_event_and_sample(trait::name::value, - _cpu_freq_sample.page_faults); - insert_event_and_sample(trait::name::value, - _cpu_freq_sample.user_mode_time); - insert_event_and_sample(trait::name::value, - _cpu_freq_sample.kernel_mode_time); - - auto get_track_name = [](const auto& cpu_id) { - return std::string(trait::name::value) + " [" + - std::to_string(cpu_id) + "]"; - }; - - auto core_freq_samples = deserialize_freqs(_cpu_freq_sample.freqs); - for(const auto& core : core_freq_samples) - { - insert_event_and_sample(get_track_name(core.id).c_str(), core.value); - } - }; -} - -rocpd_post_processing::rocpd_post_processing(metadata_registry& md, - agent_manager& agent_mngr, int pid, int ppid) -: m_metadata(md) -, m_agent_manager(agent_mngr) -, m_data_processor(std::make_shared( - std::make_shared(pid, ppid))) -{} - -void -rocpd_post_processing::register_parser_callback([[maybe_unused]] storage_parser& parser) -{ -#if ROCPROFSYS_USE_ROCM > 0 - if(!get_use_rocpd()) - { - return; - } - parser.register_type_callback(entry_type::region, get_region_callback()); - parser.register_type_callback(entry_type::kernel_dispatch, - get_kernel_dispatch_callback()); - parser.register_type_callback(entry_type::memory_copy, get_memory_copy_callback()); -# if(ROCPROFILER_VERSION >= 600) - parser.register_type_callback(entry_type::memory_alloc, - get_memory_allocate_callback()); -# endif - parser.register_type_callback(entry_type::in_time_sample, - get_in_time_sample_callback()); - parser.register_type_callback(entry_type::pmc_event_with_sample, - get_pmc_event_with_sample_callback()); - parser.register_type_callback(entry_type::amd_smi_sample, - get_amd_smi_sample_callback()); - parser.register_type_callback(entry_type::cpu_freq_sample, - get_cpu_freq_sample_callback()); - parser.register_type_callback(entry_type::backtrace_region_sample, - get_backtrace_sample_callback()); - ROCPROFSYS_DEBUG("Buffer parser callbacks are registered..\n"); - - parser.register_on_finished_callback( - std::make_unique>([this]() { - if(m_data_processor != nullptr) - { - m_data_processor->flush(); - } - })); -#endif -} - -void -rocpd_post_processing::post_process_metadata() -{ -#if ROCPROFSYS_USE_ROCM > 0 - if(!get_use_rocpd()) - { - return; - } - ROCPROFSYS_DEBUG("Post processing metadata..\n"); - auto data_processor = get_data_processor(); - auto n_info = node_info::get_instance(); - - data_processor->insert_node_info(n_info.id, n_info.hash, n_info.machine_id.c_str(), - n_info.system_name.c_str(), n_info.node_name.c_str(), - n_info.release.c_str(), n_info.version.c_str(), - n_info.machine.c_str(), n_info.domain_name.c_str()); - - auto process_info = m_metadata.get_process_info(); - data_processor->insert_process_info(n_info.id, process_info.ppid, process_info.pid, 0, - 0, process_info.start, process_info.end, - process_info.command.c_str(), "{}"); - - const auto& agents = m_agent_manager.get_agents(); - int counter = 0; - for(const auto& rocpd_agent : agents) - { - auto _base_id = data_processor->insert_agent( - n_info.id, process_info.pid, - ((rocpd_agent->type == agent_type::GPU) ? "GPU" : "CPU"), counter++, - rocpd_agent->logical_node_id, rocpd_agent->logical_node_type_id, - rocpd_agent->device_id, rocpd_agent->name.c_str(), - rocpd_agent->model_name.c_str(), rocpd_agent->vendor_name.c_str(), - rocpd_agent->product_name.c_str(), ""); - rocpd_agent->base_id = _base_id; - } - auto _string_list = m_metadata.get_string_list(); - for(auto& _string : _string_list) - { - data_processor->insert_string(std::string(_string).c_str()); - } - - auto _thread_info_list = m_metadata.get_thread_info_list(); - for(auto& t_info : _thread_info_list) - { - rocpd_insert_thread_id(t_info, n_info, process_info); - } - - auto _track_info_list = m_metadata.get_track_info_list(); - for(auto& track : _track_info_list) - { - auto thread_id = - track.thread_id.has_value() - ? std::make_optional(data_processor->map_thread_id_to_primary_key( - track.thread_id.value())) - : std::nullopt; - data_processor->insert_track(track.track_name.c_str(), n_info.id, - process_info.pid, thread_id); - } - - auto _code_object_list = m_metadata.get_code_object_list(); - for(const auto& code_object : _code_object_list) - { - auto dev_id = - m_agent_manager.get_agent_by_handle(get_handle_from_code_object(code_object)) - .base_id; - - const char* strg_type = "UNKNOWN"; - switch(code_object.storage_type) - { - case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE: strg_type = "FILE"; break; - case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY: strg_type = "MEMORY"; break; - default: break; - } - data_processor->insert_code_object(code_object.code_object_id, n_info.id, - process_info.pid, dev_id, code_object.uri, - code_object.load_base, code_object.load_size, - code_object.load_delta, strg_type); - } - - auto _kernel_symbols_list = m_metadata.get_kernel_symbol_list(); - for(const auto& kernel_symbol : _kernel_symbols_list) - { - auto kernel_name = tim::demangle(kernel_symbol.kernel_name); - data_processor->insert_kernel_symbol( - kernel_symbol.kernel_id, n_info.id, process_info.pid, - kernel_symbol.code_object_id, kernel_symbol.kernel_name, kernel_name.c_str(), - kernel_symbol.kernel_object, kernel_symbol.kernarg_segment_size, - kernel_symbol.kernarg_segment_alignment, kernel_symbol.group_segment_size, - kernel_symbol.private_segment_size, kernel_symbol.sgpr_count, - kernel_symbol.arch_vgpr_count, kernel_symbol.accum_vgpr_count); - - data_processor->insert_string(kernel_name.c_str()); - } - - auto _queue_list = m_metadata.get_queue_list(); - for(const auto& queue_handle : _queue_list) - { - std::stringstream ss; - ss << "Queue " << queue_handle; - data_processor->insert_queue_info(queue_handle, n_info.id, process_info.pid, - ss.str().c_str()); - } - - auto _stream_list = m_metadata.get_stream_list(); - for(const auto& stream_handle : _stream_list) - { - std::stringstream ss; - ss << "Stream " << stream_handle; - data_processor->insert_stream_info(stream_handle, n_info.id, process_info.pid, - ss.str().c_str()); - } - - auto buffer_info_list = m_metadata.get_buffer_name_info(); - for(const auto& buffer_info : buffer_info_list) - { - for(const auto& item : buffer_info.items()) - { - data_processor->insert_string(*item.second); - } - } - - auto callback_info_list = m_metadata.get_callback_tracing_info(); - for(const auto& cb_info : callback_info_list) - { - for(const auto& item : cb_info.items()) - { - data_processor->insert_string(*item.second); - } - } - - auto pmc_info_list = m_metadata.get_pmc_info_list(); - for(const auto& pmc_info : pmc_info_list) - { - const auto agent_primary_key = - m_agent_manager - .get_agent_by_type_index(pmc_info.agent_type_index, pmc_info.type) - .base_id; - - data_processor->insert_pmc_description( - n_info.id, process_info.pid, agent_primary_key, pmc_info.target_arch.c_str(), - pmc_info.event_code, pmc_info.instance_id, pmc_info.name.c_str(), - pmc_info.symbol.c_str(), pmc_info.description.c_str(), - pmc_info.long_description.c_str(), pmc_info.component.c_str(), - pmc_info.units.c_str(), pmc_info.value_type.c_str(), pmc_info.block.c_str(), - pmc_info.expression.c_str(), pmc_info.is_constant, pmc_info.is_derived); - } -#endif -} - -inline void -rocpd_post_processing::rocpd_insert_thread_id(info::thread& t_info, - const node_info& n_info, - const info::process& process_info) const -{ - const auto& extended_info = thread_info::get(t_info.thread_id, SystemTID); - if(extended_info.has_value()) - { - t_info.start = extended_info->get_start(); - t_info.end = extended_info->get_stop(); - } - - std::stringstream ss; - ss << "Thread " << t_info.thread_id; - get_data_processor()->insert_thread_info(n_info.id, process_info.ppid, - process_info.pid, t_info.thread_id, - ss.str().c_str(), t_info.start, t_info.end); -} - -} // namespace trace_cache -} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_processor.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_processor.cpp new file mode 100644 index 0000000000..206efc1af1 --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_processor.cpp @@ -0,0 +1,827 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "core/trace_cache/rocpd_processor.hpp" +#include "core/agent_manager.hpp" +#include "core/config.hpp" +#include "core/debug.hpp" +#include "core/gpu_metrics.hpp" +#include "core/node_info.hpp" +#include "core/rocpd/data_processor.hpp" +#include "core/rocpd/data_storage/database.hpp" +#include "core/trace_cache/metadata_registry.hpp" +#include "core/trace_cache/sample_type.hpp" +#include "library/thread_info.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#if ROCPROFSYS_USE_ROCM > 0 +# include "library/rocprofiler-sdk/fwd.hpp" +# include +# include +#endif + +namespace rocprofsys +{ +namespace trace_cache +{ +namespace +{ + +#if ROCPROFSYS_USE_ROCM > 0 +auto +get_handle_from_code_object( + const rocprofiler_callback_tracing_code_object_load_data_t& code_object) +{ +# if(ROCPROFILER_VERSION >= 600) + return code_object.agent_id.handle; +# else + return code_object.rocp_agent.handle; +# endif +} +#endif +} // namespace + +void +rocpd_processor_t::handle([[maybe_unused]] const kernel_dispatch_sample& _kds) +{ +#if ROCPROFSYS_USE_ROCM > 0 + auto& n_info = node_info::get_instance(); + auto process = m_metadata->get_process_info(); + auto agent_primary_key = + m_agent_manager->get_agent_by_handle(_kds.agent_id_handle).base_id; + + auto thread_primary_key = + m_data_processor->map_thread_id_to_primary_key(_kds.thread_id); + + auto category_id = m_data_processor->insert_string( + trait::name::value); + + auto kernel_symbol = m_metadata->get_kernel_symbol(_kds.kernel_id); + + if(!kernel_symbol.has_value()) + { + throw std::runtime_error("Kernel symbol is missing for kernel dispatch"); + } + + auto region_name_primary_key = m_data_processor->insert_string( + tim::demangle(kernel_symbol->kernel_name).c_str()); + + auto stack_id = _kds.correlation_id_internal; + auto parent_stack_id = _kds.correlation_id_ancestor; + auto correlation_id = 0; + + auto event_id = m_data_processor->insert_event(category_id, stack_id, parent_stack_id, + correlation_id); + + m_data_processor->insert_kernel_dispatch( + n_info.id, process.pid, thread_primary_key, agent_primary_key, _kds.kernel_id, + _kds.dispatch_id, _kds.queue_id_handle, _kds.stream_handle, _kds.start_timestamp, + _kds.end_timestamp, _kds.private_segment_size, _kds.group_segment_size, + _kds.workgroup_size_x, _kds.workgroup_size_y, _kds.workgroup_size_z, + _kds.grid_size_x, _kds.grid_size_y, _kds.grid_size_z, region_name_primary_key, + event_id); +#endif +} + +void +rocpd_processor_t::handle([[maybe_unused]] const memory_copy_sample& _mcs) +{ +#if ROCPROFSYS_USE_ROCM > 0 + auto& n_info = node_info::get_instance(); + auto process = m_metadata->get_process_info(); + + auto _name = std::string{ m_metadata->get_buffer_name_info().at( + static_cast(_mcs.kind), + static_cast(_mcs.operation)) }; + auto name_primary_key = m_data_processor->insert_string(_name.c_str()); + + auto category_primary_key = + m_data_processor->insert_string(trait::name::value); + + auto thread_primary_key = + m_data_processor->map_thread_id_to_primary_key(_mcs.thread_id); + + auto dst_agent_primary_key = + m_agent_manager->get_agent_by_handle(_mcs.dst_agent_id_handle).base_id; + auto src_agent_primary_key = + m_agent_manager->get_agent_by_handle(_mcs.src_agent_id_handle).base_id; + + auto stack_id = _mcs.correlation_id_internal; + auto parent_stack_id = _mcs.correlation_id_ancestor; + auto correlation_id = 0; + auto queue_id = 0; + + auto event_primary_key = m_data_processor->insert_event( + category_primary_key, stack_id, parent_stack_id, correlation_id); + + m_data_processor->insert_memory_copy( + n_info.id, process.pid, thread_primary_key, _mcs.start_timestamp, + _mcs.end_timestamp, name_primary_key, dst_agent_primary_key, + _mcs.dst_address_value, src_agent_primary_key, _mcs.src_address_value, _mcs.bytes, + queue_id, _mcs.stream_handle, name_primary_key, event_primary_key); +#endif +} + +void +rocpd_processor_t::handle([[maybe_unused]] const memory_allocate_sample& _mas) +{ +#if ROCPROFSYS_USE_ROCM > 0 && (ROCPROFILER_VERSION >= 600) + static auto memtype_to_db = + [](std::string_view memory_type) -> std::pair { + constexpr auto MEMORY_PREFIX = std::string_view{ "MEMORY_ALLOCATION_" }; + constexpr auto SCRATCH_PREFIX = std::string_view{ "SCRATCH_MEMORY_" }; + constexpr auto VMEM_PREFIX = std::string_view{ "VMEM_" }; + constexpr auto ASYNC_PREFIX = std::string_view{ "ASYNC_" }; + + std::string _type; + std::string _level; + if(memory_type.find(MEMORY_PREFIX) == 0) + { + _type = memory_type.substr(MEMORY_PREFIX.length()); + if(_type.find(VMEM_PREFIX) == 0) + { + _type = _type.substr(VMEM_PREFIX.length()); + _level = "VIRTUAL"; + } + else + { + _level = "REAL"; + } + } + else if(memory_type.find(SCRATCH_PREFIX) == 0) + { + _type = memory_type.substr(SCRATCH_PREFIX.length()); + _level = "SCRATCH"; + if(memory_type.find(ASYNC_PREFIX) == 0) + { + _type = memory_type.substr(ASYNC_PREFIX.length()); // RECLAIM + } + } + + if(_type == "ALLOCATE") + { + _type = "ALLOC"; + } + + return std::make_pair(_type, _level); + }; + + auto& n_info = node_info::get_instance(); + auto process = m_metadata->get_process_info(); + auto thread_primary_key = + m_data_processor->map_thread_id_to_primary_key(_mas.thread_id); + auto agent_primary_key = std::optional{}; + + const auto invalid_context = ROCPROFILER_CONTEXT_NONE; + if(_mas.agent_id_handle != invalid_context.handle) + { + { + agent_primary_key = + m_agent_manager->get_agent_by_handle(_mas.agent_id_handle).base_id; + } + const auto* _name = m_metadata->get_buffer_name_info().at( + static_cast(_mas.kind), + static_cast(_mas.operation)); + + auto [type, level] = memtype_to_db(_name); + + auto stack_id = _mas.correlation_id_internal; + auto parent_stack_id = _mas.correlation_id_ancestor; + auto correlation_id = 0; + auto queue_id = 0; + + auto category_primary_key = m_data_processor->insert_string( + trait::name::value); + + auto event_primary_key = m_data_processor->insert_event( + category_primary_key, stack_id, parent_stack_id, correlation_id); + + m_data_processor->insert_memory_alloc( + n_info.id, process.pid, thread_primary_key, agent_primary_key, type.c_str(), + level.c_str(), _mas.start_timestamp, _mas.end_timestamp, _mas.address_value, + _mas.allocation_size, queue_id, _mas.stream_handle, event_primary_key); + } +#endif +} + +void +rocpd_processor_t::handle([[maybe_unused]] const region_sample& _rs) +{ +#if ROCPROFSYS_USE_ROCM > 0 + static auto parse_args = []([[maybe_unused]] const std::string& arg_str) { + rocprofiler_sdk::function_args_t args; + const std::string delimiter = ";;"; + + auto split = [](const std::string& str, const std::string& _delimiter) { + std::vector tokens; + size_t start = 0; + size_t end = str.find(_delimiter); + + while(end != std::string::npos) + { + tokens.push_back(str.substr(start, end - start)); + start = end + _delimiter.length(); + end = str.find(_delimiter, start); + } + + return tokens; + }; + + if(arg_str.empty()) + { + return args; + } + + auto tokens = split(arg_str, delimiter); + + // Ensure the number of tokens is a multiple of 4 + if(tokens.size() % 4 != 0) + { + throw std::invalid_argument("Malformed argument string."); + } + + for(auto it = tokens.begin(); it != tokens.end(); it += 4) + { + rocprofiler_sdk::argument_info arg = { static_cast(std::stoi(*it)), + *(it + 1), *(it + 2), *(it + 3) }; + args.push_back(arg); + } + + return args; + }; + + auto& n_info = node_info::get_instance(); + auto process = m_metadata->get_process_info(); + auto thread_primary_key = + m_data_processor->map_thread_id_to_primary_key(_rs.thread_id); + + auto name_primary_key = m_data_processor->insert_string(_rs.name.c_str()); + auto category_primary_key = m_data_processor->insert_string(_rs.category.c_str()); + + size_t stack_id = _rs.correlation_id_internal; + size_t parent_stack_id = _rs.correlation_id_ancestor; + size_t correlation_id = 0; + + auto event_primary_key = + m_data_processor->insert_event(category_primary_key, stack_id, parent_stack_id, + correlation_id, _rs.call_stack.c_str()); + + auto args = parse_args(_rs.args_str); + for(const auto& arg : args) + { + m_data_processor->insert_args(event_primary_key, arg.arg_number, + arg.arg_type.c_str(), arg.arg_name.c_str(), + arg.arg_value.c_str()); + } + + m_data_processor->insert_region(n_info.id, process.pid, thread_primary_key, + _rs.start_timestamp, _rs.end_timestamp, + name_primary_key, event_primary_key); +#endif +} + +void +rocpd_processor_t::handle([[maybe_unused]] const backtrace_region_sample& _bts) +{ +#if ROCPROFSYS_USE_ROCM > 0 + auto& n_info = node_info::get_instance(); + auto process = m_metadata->get_process_info(); + auto thread_primary_key = + m_data_processor->map_thread_id_to_primary_key(_bts.thread_id); + auto name_primary_key = m_data_processor->insert_string(_bts.name.c_str()); + auto category_primary_key = m_data_processor->insert_string(_bts.category.c_str()); + + auto event_primary_key = m_data_processor->insert_event( + category_primary_key, 0, 0, 0, _bts.call_stack.c_str(), _bts.line_info.c_str(), + _bts.extdata.c_str()); + + m_data_processor->insert_region(n_info.id, process.pid, thread_primary_key, + _bts.start_timestamp, _bts.end_timestamp, + name_primary_key, event_primary_key); + m_data_processor->insert_sample(_bts.track_name.c_str(), _bts.start_timestamp, + event_primary_key); +#endif +} + +void +rocpd_processor_t::handle([[maybe_unused]] const in_time_sample& _its) +{ +#if ROCPROFSYS_USE_ROCM > 0 + auto track_primary_key = m_data_processor->insert_string(_its.track_name.c_str()); + + auto event_id = m_data_processor->insert_event( + track_primary_key, _its.stack_id, _its.parent_stack_id, _its.correlation_id, + _its.call_stack.c_str(), _its.line_info.c_str(), _its.event_metadata.c_str()); + m_data_processor->insert_sample(_its.track_name.c_str(), _its.timestamp_ns, event_id, + "{}"); +#endif +} + +void +rocpd_processor_t::handle([[maybe_unused]] const pmc_event_with_sample& _pmc) +{ +#if ROCPROFSYS_USE_ROCM > 0 + auto track_primary_key = m_data_processor->insert_string(_pmc.track_name.c_str()); + + auto agent_primary_key = + m_agent_manager + ->get_agent_by_id(_pmc.device_id, static_cast(_pmc.device_type)) + .base_id; + + auto event_id = m_data_processor->insert_event( + track_primary_key, _pmc.stack_id, _pmc.parent_stack_id, _pmc.correlation_id, + _pmc.call_stack.c_str(), _pmc.line_info.c_str(), _pmc.event_metadata.c_str()); + m_data_processor->insert_sample(_pmc.track_name.c_str(), _pmc.timestamp_ns, event_id, + "{}"); + + m_data_processor->insert_pmc_event(event_id, agent_primary_key, + _pmc.pmc_info_name.c_str(), _pmc.value); +#endif +} + +void +rocpd_processor_t::handle([[maybe_unused]] const amd_smi_sample& _amd_smi) +{ +#if ROCPROFSYS_USE_ROCM > 0 + + const auto* _name = trait::name::value; + auto name_primary_key = m_data_processor->insert_string(_name); + auto event_id = m_data_processor->insert_event(name_primary_key, 0, 0, 0); + + auto base_id = + m_agent_manager->get_agent_by_type_index(_amd_smi.device_id, agent_type::GPU) + .base_id; + + auto insert_event_and_sample = [&](bool enabled, const char* pmc_name, + const char* track_name, double value) { + if(!enabled) return; + m_data_processor->insert_pmc_event(event_id, base_id, pmc_name, value); + m_data_processor->insert_sample(track_name, _amd_smi.timestamp, event_id); + }; + + using pos = trace_cache::amd_smi_sample::settings_positions; + std::bitset<8> settings_bits(_amd_smi.settings); + bool is_busy_enabled = settings_bits.test(static_cast(pos::busy)); + bool is_temp_enabled = settings_bits.test(static_cast(pos::temp)); + bool is_power_enabled = settings_bits.test(static_cast(pos::power)); + bool is_mem_usage_enabled = settings_bits.test(static_cast(pos::mem_usage)); + + bool is_vcn_enabled = settings_bits.test(static_cast(pos::vcn_activity)); + bool is_jpeg_enabled = settings_bits.test(static_cast(pos::jpeg_activity)); + bool is_xgmi_enabled = settings_bits.test(static_cast(pos::xgmi)); + bool is_pcie_enabled = settings_bits.test(static_cast(pos::pcie)); + + insert_event_and_sample( + is_busy_enabled, trait::name::value, + info::annotate_with_device_id(_amd_smi.device_id) + .c_str(), + _amd_smi.gfx_activity); + insert_event_and_sample( + is_busy_enabled, trait::name::value, + info::annotate_with_device_id(_amd_smi.device_id) + .c_str(), + _amd_smi.umc_activity); + insert_event_and_sample( + is_busy_enabled, trait::name::value, + info::annotate_with_device_id(_amd_smi.device_id) + .c_str(), + _amd_smi.mm_activity); + insert_event_and_sample( + is_temp_enabled, trait::name::value, + info::annotate_with_device_id(_amd_smi.device_id).c_str(), + _amd_smi.temperature); + + insert_event_and_sample( + is_power_enabled, trait::name::value, + info::annotate_with_device_id(_amd_smi.device_id) + .c_str(), + _amd_smi.power); + insert_event_and_sample( + is_mem_usage_enabled, trait::name::value, + info::annotate_with_device_id(_amd_smi.device_id) + .c_str(), + _amd_smi.mem_usage); + + if(!is_vcn_enabled && !is_jpeg_enabled && !is_xgmi_enabled && !is_pcie_enabled) + return; + + gpu::gpu_metrics_t gpu_metrics; + gpu::gpu_metrics_capabilities_t capabilities; + gpu::deserialize_gpu_metrics(_amd_smi.gpu_activity, gpu_metrics, is_vcn_enabled, + is_jpeg_enabled, is_xgmi_enabled, is_pcie_enabled, + capabilities); + + // Insert VCN and JPEG activity metrics + auto insert_decode_vector_metrics = [&](auto category, bool _is_enabled, + const std::vector& data, + std::optional _idx = std::nullopt) { + if(!_is_enabled) return; + + using Category = std::decay_t; + + for(size_t i = 0; i < data.size(); ++i) + { + const auto value = data[i]; + if(value == std::numeric_limits::max()) continue; + + auto pmc_name = info::annotate_category(_idx, i); + auto track_name = + info::annotate_with_device_id(_amd_smi.device_id, _idx, i); + + insert_event_and_sample(_is_enabled, pmc_name.c_str(), track_name.c_str(), + static_cast(value)); + } + }; + + // Insert XGMI read/write data metrics + auto insert_xgmi_vector_metrics = [&](auto category, bool _is_enabled, + const std::vector& data, + std::optional _idx = std::nullopt) { + if(!_is_enabled) return; + + using Category = std::decay_t; + + for(size_t i = 0; i < data.size(); ++i) + { + const auto value = data[i]; + if(value == std::numeric_limits::max()) continue; + + auto pmc_name = info::annotate_category(_idx, i); + auto track_name = + info::annotate_with_device_id(_amd_smi.device_id, _idx, i); + + insert_event_and_sample(_is_enabled, pmc_name.c_str(), track_name.c_str(), + static_cast(value)); + } + }; + + // Insert VCN activity metrics + if(capabilities.flags.vcn_is_device_level_only) + { + // Device-level: use vcn_activity vector + insert_decode_vector_metrics(category::amd_smi_vcn_activity{}, is_vcn_enabled, + gpu_metrics.vcn_activity, std::nullopt); + } + else + { + // Per-XCP: iterate through actual XCPs in vcn_busy + for(size_t xcp = 0; xcp < gpu_metrics.vcn_busy.size(); ++xcp) + { + insert_decode_vector_metrics(category::amd_smi_vcn_activity{}, is_vcn_enabled, + gpu_metrics.vcn_busy[xcp], xcp); + } + } + + // Insert JPEG activity metrics + if(capabilities.flags.jpeg_is_device_level_only) + { + // Device-level: use jpeg_activity vector + insert_decode_vector_metrics(category::amd_smi_jpeg_activity{}, is_jpeg_enabled, + gpu_metrics.jpeg_activity, std::nullopt); + } + else + { + // Per-XCP: iterate through actual XCPs in jpeg_busy + for(size_t xcp = 0; xcp < gpu_metrics.jpeg_busy.size(); ++xcp) + { + insert_decode_vector_metrics(category::amd_smi_jpeg_activity{}, + is_jpeg_enabled, gpu_metrics.jpeg_busy[xcp], + xcp); + } + } + + // Insert XGMI metrics (scalar values) + insert_event_and_sample( + is_xgmi_enabled, trait::name::value, + info::annotate_with_device_id( + _amd_smi.device_id) + .c_str(), + gpu_metrics.xgmi_link_width); + + insert_event_and_sample( + is_xgmi_enabled, trait::name::value, + info::annotate_with_device_id( + _amd_smi.device_id) + .c_str(), + gpu_metrics.xgmi_link_speed); + + insert_xgmi_vector_metrics(category::amd_smi_xgmi_read_data{}, is_xgmi_enabled, + gpu_metrics.xgmi_read_data_acc, std::nullopt); + + insert_xgmi_vector_metrics(category::amd_smi_xgmi_write_data{}, is_xgmi_enabled, + gpu_metrics.xgmi_write_data_acc, std::nullopt); + + insert_event_and_sample( + is_pcie_enabled, trait::name::value, + info::annotate_with_device_id( + _amd_smi.device_id) + .c_str(), + gpu_metrics.pcie_link_width); + + insert_event_and_sample( + is_pcie_enabled, trait::name::value, + info::annotate_with_device_id( + _amd_smi.device_id) + .c_str(), + gpu_metrics.pcie_link_speed); + + insert_event_and_sample( + is_pcie_enabled, trait::name::value, + info::annotate_with_device_id( + _amd_smi.device_id) + .c_str(), + static_cast(gpu_metrics.pcie_bandwidth_acc)); + + insert_event_and_sample( + is_pcie_enabled, trait::name::value, + info::annotate_with_device_id( + _amd_smi.device_id) + .c_str(), + static_cast(gpu_metrics.pcie_bandwidth_inst)); +#endif +} + +void +rocpd_processor_t::handle([[maybe_unused]] const cpu_freq_sample& _cpu_freq_sample) +{ +#if ROCPROFSYS_USE_ROCM > 0 + struct core_freq_sample + { + size_t id; + float value; + }; + + auto deserialize_freqs = [](const std::vector& buffer) { + std::vector result; + size_t offset = 0; + + while(offset + sizeof(float) + sizeof(size_t) <= buffer.size()) + { + core_freq_sample core_sample; + std::memcpy(&core_sample.id, buffer.data() + offset, sizeof(size_t)); + offset += sizeof(size_t); + std::memcpy(&core_sample.value, buffer.data() + offset, sizeof(float)); + offset += sizeof(float); + result.push_back(core_sample); + } + return result; + }; + + const auto* _name = trait::name::value; + auto name_primary_key = m_data_processor->insert_string(_name); + auto event_id = m_data_processor->insert_event(name_primary_key, 0, 0, 0); + + auto device_id = 0; + + auto base_id = + m_agent_manager->get_agent_by_type_index(device_id, agent_type::CPU).base_id; + + auto insert_event_and_sample = [&](const char* name, double value) { + m_data_processor->insert_pmc_event(event_id, base_id, name, value); + m_data_processor->insert_sample(name, _cpu_freq_sample.timestamp, event_id); + }; + + insert_event_and_sample(trait::name::value, + _cpu_freq_sample.page_rss); + insert_event_and_sample(trait::name::value, + _cpu_freq_sample.virt_mem_usage); + insert_event_and_sample(trait::name::value, + _cpu_freq_sample.peak_rss); + insert_event_and_sample(trait::name::value, + _cpu_freq_sample.context_switch_count); + insert_event_and_sample(trait::name::value, + _cpu_freq_sample.page_faults); + insert_event_and_sample(trait::name::value, + _cpu_freq_sample.user_mode_time); + insert_event_and_sample(trait::name::value, + _cpu_freq_sample.kernel_mode_time); + + auto get_track_name = [](const auto& cpu_id) { + return std::string(trait::name::value) + " [" + + std::to_string(cpu_id) + "]"; + }; + + auto core_freq_samples = deserialize_freqs(_cpu_freq_sample.freqs); + for(const auto& core : core_freq_samples) + { + insert_event_and_sample(get_track_name(core.id).c_str(), core.value); + } +#endif +} + +rocpd_processor_t::rocpd_processor_t(const std::shared_ptr& md, + const std::shared_ptr& agent_mngr, + int pid, int ppid) +: processor_t() +, m_metadata(md) +, m_agent_manager(agent_mngr) +, m_data_processor(std::make_shared( + std::make_shared(pid, ppid))) +{} + +void +rocpd_processor_t::prepare_for_processing() +{ + post_process_metadata(); +} + +void +rocpd_processor_t::finalize_processing() +{ + m_data_processor->flush(); +} + +void +rocpd_processor_t::post_process_metadata() +{ +#if ROCPROFSYS_USE_ROCM > 0 + if(!get_use_rocpd()) + { + return; + } + ROCPROFSYS_DEBUG("Post processing metadata..\n"); + auto n_info = node_info::get_instance(); + + m_data_processor->insert_node_info( + n_info.id, n_info.hash, n_info.machine_id.c_str(), n_info.system_name.c_str(), + n_info.node_name.c_str(), n_info.release.c_str(), n_info.version.c_str(), + n_info.machine.c_str(), n_info.domain_name.c_str()); + + auto process_info = m_metadata->get_process_info(); + m_data_processor->insert_process_info(n_info.id, process_info.ppid, process_info.pid, + 0, 0, process_info.start, process_info.end, + process_info.command.c_str(), "{}"); + + const auto& agents = m_agent_manager->get_agents(); + int counter = 0; + for(const auto& rocpd_agent : agents) + { + auto _base_id = m_data_processor->insert_agent( + n_info.id, process_info.pid, + ((rocpd_agent->type == agent_type::GPU) ? "GPU" : "CPU"), counter++, + rocpd_agent->logical_node_id, rocpd_agent->logical_node_type_id, + rocpd_agent->device_id, rocpd_agent->name.c_str(), + rocpd_agent->model_name.c_str(), rocpd_agent->vendor_name.c_str(), + rocpd_agent->product_name.c_str(), ""); + rocpd_agent->base_id = _base_id; + } + auto _string_list = m_metadata->get_string_list(); + for(auto& _string : _string_list) + { + m_data_processor->insert_string(std::string(_string).c_str()); + } + + auto _thread_info_list = m_metadata->get_thread_info_list(); + for(auto& t_info : _thread_info_list) + { + insert_thread_id(t_info, n_info, process_info); + } + + auto _track_info_list = m_metadata->get_track_info_list(); + for(auto& track : _track_info_list) + { + auto thread_id = track.thread_id.has_value() + ? std::make_optional( + m_data_processor->map_thread_id_to_primary_key( + track.thread_id.value())) + : std::nullopt; + m_data_processor->insert_track(track.track_name.c_str(), n_info.id, + process_info.pid, thread_id); + } + + auto _code_object_list = m_metadata->get_code_object_list(); + for(const auto& code_object : _code_object_list) + { + auto dev_id = + m_agent_manager->get_agent_by_handle(get_handle_from_code_object(code_object)) + .base_id; + + const char* strg_type = "UNKNOWN"; + switch(code_object.storage_type) + { + case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE: strg_type = "FILE"; break; + case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY: strg_type = "MEMORY"; break; + default: break; + } + m_data_processor->insert_code_object(code_object.code_object_id, n_info.id, + process_info.pid, dev_id, code_object.uri, + code_object.load_base, code_object.load_size, + code_object.load_delta, strg_type); + } + + auto _kernel_symbols_list = m_metadata->get_kernel_symbol_list(); + for(const auto& kernel_symbol : _kernel_symbols_list) + { + auto kernel_name = tim::demangle(kernel_symbol.kernel_name); + m_data_processor->insert_kernel_symbol( + kernel_symbol.kernel_id, n_info.id, process_info.pid, + kernel_symbol.code_object_id, kernel_symbol.kernel_name, kernel_name.c_str(), + kernel_symbol.kernel_object, kernel_symbol.kernarg_segment_size, + kernel_symbol.kernarg_segment_alignment, kernel_symbol.group_segment_size, + kernel_symbol.private_segment_size, kernel_symbol.sgpr_count, + kernel_symbol.arch_vgpr_count, kernel_symbol.accum_vgpr_count); + + m_data_processor->insert_string(kernel_name.c_str()); + } + + auto _queue_list = m_metadata->get_queue_list(); + for(const auto& queue_handle : _queue_list) + { + std::stringstream ss; + ss << "Queue " << queue_handle; + m_data_processor->insert_queue_info(queue_handle, n_info.id, process_info.pid, + ss.str().c_str()); + } + + auto _stream_list = m_metadata->get_stream_list(); + for(const auto& stream_handle : _stream_list) + { + std::stringstream ss; + ss << "Stream " << stream_handle; + m_data_processor->insert_stream_info(stream_handle, n_info.id, process_info.pid, + ss.str().c_str()); + } + + auto buffer_info_list = m_metadata->get_buffer_name_info(); + for(const auto& buffer_info : buffer_info_list) + { + for(const auto& item : buffer_info.items()) + { + m_data_processor->insert_string(*item.second); + } + } + + auto callback_info_list = m_metadata->get_callback_tracing_info(); + for(const auto& cb_info : callback_info_list) + { + for(const auto& item : cb_info.items()) + { + m_data_processor->insert_string(*item.second); + } + } + + auto pmc_info_list = m_metadata->get_pmc_info_list(); + for(const auto& pmc_info : pmc_info_list) + { + const auto agent_primary_key = + m_agent_manager + ->get_agent_by_type_index(pmc_info.agent_type_index, pmc_info.type) + .base_id; + + m_data_processor->insert_pmc_description( + n_info.id, process_info.pid, agent_primary_key, pmc_info.target_arch.c_str(), + pmc_info.event_code, pmc_info.instance_id, pmc_info.name.c_str(), + pmc_info.symbol.c_str(), pmc_info.description.c_str(), + pmc_info.long_description.c_str(), pmc_info.component.c_str(), + pmc_info.units.c_str(), pmc_info.value_type.c_str(), pmc_info.block.c_str(), + pmc_info.expression.c_str(), pmc_info.is_constant, pmc_info.is_derived); + } +#endif +} + +inline void +rocpd_processor_t::insert_thread_id(info::thread& t_info, const node_info& n_info, + const info::process& process_info) +{ + const auto& extended_info = thread_info::get(t_info.thread_id, SystemTID); + if(extended_info.has_value()) + { + t_info.start = extended_info->get_start(); + t_info.end = extended_info->get_stop(); + } + + std::stringstream ss; + ss << "Thread " << t_info.thread_id; + m_data_processor->insert_thread_info(n_info.id, process_info.ppid, process_info.pid, + t_info.thread_id, ss.str().c_str(), t_info.start, + t_info.end); +} + +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_processor.hpp similarity index 54% rename from projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.hpp rename to projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_processor.hpp index 91331a0ff5..68f299a3e6 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_post_processing.hpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/rocpd_processor.hpp @@ -25,44 +25,44 @@ #include "core/node_info.hpp" #include "core/rocpd/data_processor.hpp" #include "core/trace_cache/metadata_registry.hpp" -#include "core/trace_cache/storage_parser.hpp" +#include "core/trace_cache/sample_processor.hpp" + +#include "trace_cache/sample_type.hpp" namespace rocprofsys { namespace trace_cache { -class rocpd_post_processing +class rocpd_processor_t : public processor_t { public: - rocpd_post_processing(metadata_registry& metadata, agent_manager& agent_mngr, int pid, - int ppid); + rocpd_processor_t(const std::shared_ptr& metadata, + const std::shared_ptr& agent_mngr, int pid, + int ppid); - void register_parser_callback(storage_parser& parser); - void post_process_metadata(); + void prepare_for_processing(); + void finalize_processing(); - std::shared_ptr get_data_processor() const; + void handle(const kernel_dispatch_sample& sample); + void handle(const memory_copy_sample& sample); + void handle(const memory_allocate_sample& sample); + void handle(const region_sample& sample); + void handle(const in_time_sample& sample); + void handle(const pmc_event_with_sample& sample); + void handle(const amd_smi_sample& sample); + void handle(const cpu_freq_sample& sample); + void handle(const backtrace_region_sample& sample); private: using primary_key = size_t; - inline void rocpd_insert_thread_id(info::thread& t_info, const node_info& n_info, - const info::process& process_info) const; + void post_process_metadata(); + inline void insert_thread_id(info::thread& t_info, const node_info& n_info, + const info::process& process_info); - postprocessing_callback get_kernel_dispatch_callback() const; - postprocessing_callback get_memory_copy_callback() const; -#if(ROCPROFILER_VERSION >= 600) - postprocessing_callback get_memory_allocate_callback() const; -#endif - postprocessing_callback get_region_callback() const; - postprocessing_callback get_in_time_sample_callback() const; - postprocessing_callback get_pmc_event_with_sample_callback() const; - postprocessing_callback get_amd_smi_sample_callback() const; - postprocessing_callback get_cpu_freq_sample_callback() const; - postprocessing_callback get_backtrace_sample_callback() const; - - metadata_registry& m_metadata; - agent_manager& m_agent_manager; + std::shared_ptr m_metadata; + std::shared_ptr m_agent_manager; std::shared_ptr m_data_processor; }; diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/sample_processor.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/sample_processor.hpp new file mode 100644 index 0000000000..b55d80740b --- /dev/null +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/sample_processor.hpp @@ -0,0 +1,310 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once +#include "core/trace_cache/cacheable.hpp" +#include "core/trace_cache/sample_type.hpp" + +#if ROCPROFSYS_USE_ROCM > 0 +# include +#endif + +#include + +namespace rocprofsys +{ +namespace trace_cache +{ + +template +struct processor_t +{ + void handle(const kernel_dispatch_sample& sample) + { + static_cast(this)->handle(sample); + } + + void handle(const memory_copy_sample& sample) + { + static_cast(this)->handle(sample); + } + +#if(ROCPROFILER_VERSION >= 600) + void handle(const memory_allocate_sample& sample) + { + static_cast(this)->handle(sample); + } +#endif + + void handle(const region_sample& sample) { static_cast(this)->handle(sample); } + + void handle(const in_time_sample& sample) { static_cast(this)->handle(sample); } + + void handle(const pmc_event_with_sample& sample) + { + static_cast(this)->handle(sample); + } + + void handle(const amd_smi_sample& sample) { static_cast(this)->handle(sample); } + + void handle(const cpu_freq_sample& sample) { static_cast(this)->handle(sample); } + + void handle(const backtrace_region_sample& sample) + { + static_cast(this)->handle(sample); + } + + void prepare_for_processing() { static_cast(this)->prepare_for_processing(); } + + void finalize_processing() { static_cast(this)->finalize_processing(); } + +protected: + ~processor_t() = default; +}; + +struct processor_view_t +{ + using kernel_dispatch_fn_t = void (*)(void*, const kernel_dispatch_sample&) noexcept; + using memory_copy_fn_t = void (*)(void*, const memory_copy_sample&) noexcept; +#if(ROCPROFILER_VERSION >= 600) + using memory_allocate_fn_t = void (*)(void*, const memory_allocate_sample&) noexcept; +#endif + using region_fn_t = void (*)(void*, const region_sample&) noexcept; + using in_time_sample_fn_t = void (*)(void*, const in_time_sample&) noexcept; + using pmc_event_fn_t = void (*)(void*, const pmc_event_with_sample&) noexcept; + using amd_smi_sample_fn_t = void (*)(void*, const amd_smi_sample&) noexcept; + using cpu_freq_sample_fn_t = void (*)(void*, const cpu_freq_sample&) noexcept; + using backtrace_region_fn_t = void (*)(void*, + const backtrace_region_sample&) noexcept; + using prepare_for_processing_fn_t = void (*)(void*) noexcept; + using finalize_processing_fn_t = void (*)(void*) noexcept; + + struct vtable_t + { + kernel_dispatch_fn_t handle_kernel_dispatch; + memory_copy_fn_t handle_memory_copy; +#if(ROCPROFILER_VERSION >= 600) + memory_allocate_fn_t handle_memory_allocate; +#endif + region_fn_t handle_region; + in_time_sample_fn_t handle_in_time_sample; + pmc_event_fn_t handle_pmc_event; + amd_smi_sample_fn_t handle_amd_smi_sample; + cpu_freq_sample_fn_t handle_cpu_freq_sample; + backtrace_region_fn_t handle_backtrace_region; + prepare_for_processing_fn_t prepare_for_processing; + finalize_processing_fn_t finalize_processing; + }; + + template + explicit processor_view_t(T& t) noexcept + : m_object{ std::addressof(t) } + , m_vtable{ std::addressof(get_vtable_for_type()) } + { + static_assert(std::is_base_of, T>::value, + "Type must be derived from processor_t"); + } + + processor_view_t(const processor_view_t&) noexcept = default; + processor_view_t(processor_view_t&&) noexcept = default; + processor_view_t& operator=(const processor_view_t&) noexcept = default; + processor_view_t& operator=(processor_view_t&&) noexcept = default; + + ROCPROFSYS_INLINE void handle(const kernel_dispatch_sample& sample) const noexcept + { + m_vtable->handle_kernel_dispatch(m_object, sample); + } + + ROCPROFSYS_INLINE void handle(const memory_copy_sample& sample) const noexcept + { + m_vtable->handle_memory_copy(m_object, sample); + } + +#if(ROCPROFILER_VERSION >= 600) + ROCPROFSYS_INLINE void handle(const memory_allocate_sample& sample) const noexcept + { + m_vtable->handle_memory_allocate(m_object, sample); + } +#endif + + ROCPROFSYS_INLINE void handle(const region_sample& sample) const noexcept + { + m_vtable->handle_region(m_object, sample); + } + + ROCPROFSYS_INLINE void handle(const in_time_sample& sample) const noexcept + { + m_vtable->handle_in_time_sample(m_object, sample); + } + + ROCPROFSYS_INLINE void handle(const pmc_event_with_sample& sample) const noexcept + { + m_vtable->handle_pmc_event(m_object, sample); + } + + ROCPROFSYS_INLINE void handle(const amd_smi_sample& sample) const noexcept + { + m_vtable->handle_amd_smi_sample(m_object, sample); + } + + ROCPROFSYS_INLINE void handle(const cpu_freq_sample& sample) const noexcept + { + m_vtable->handle_cpu_freq_sample(m_object, sample); + } + + ROCPROFSYS_INLINE void handle(const backtrace_region_sample& sample) const noexcept + { + m_vtable->handle_backtrace_region(m_object, sample); + } + + ROCPROFSYS_INLINE void prepare_for_processing() const noexcept + { + m_vtable->prepare_for_processing(m_object); + } + + ROCPROFSYS_INLINE void finalize_processing() const noexcept + { + m_vtable->finalize_processing(m_object); + } + +private: + template + static inline const vtable_t& get_vtable_for_type() noexcept + { + static const vtable_t vtable{ + +[](void* obj, const kernel_dispatch_sample& sample) noexcept { + static_cast(obj)->handle(sample); + }, + +[](void* obj, const memory_copy_sample& sample) noexcept { + static_cast(obj)->handle(sample); + }, +#if(ROCPROFILER_VERSION >= 600) + +[](void* obj, const memory_allocate_sample& sample) noexcept { + static_cast(obj)->handle(sample); + }, +#endif + +[](void* obj, const region_sample& sample) noexcept { + static_cast(obj)->handle(sample); + }, + +[](void* obj, const in_time_sample& sample) noexcept { + static_cast(obj)->handle(sample); + }, + +[](void* obj, const pmc_event_with_sample& sample) noexcept { + static_cast(obj)->handle(sample); + }, + +[](void* obj, const amd_smi_sample& sample) noexcept { + static_cast(obj)->handle(sample); + }, + +[](void* obj, const cpu_freq_sample& sample) noexcept { + static_cast(obj)->handle(sample); + }, + +[](void* obj, const backtrace_region_sample& sample) noexcept { + static_cast(obj)->handle(sample); + }, + +[](void* obj) noexcept { static_cast(obj)->prepare_for_processing(); }, + +[](void* obj) noexcept { static_cast(obj)->finalize_processing(); } + }; + return vtable; + } + + void* m_object; + const vtable_t* m_vtable; +}; + +struct sample_processor_t +{ + void clear_handlers() { m_processor_view_list.clear(); } + + template + void add_handler(T& handler) + { + m_processor_view_list.emplace_back(handler); + } + + template + ROCPROFSYS_INLINE void handle_sample(const SampleType& sample) const + { + for(const auto& view : m_processor_view_list) + view.handle(sample); + } + + ROCPROFSYS_INLINE void prepare_for_processing() const noexcept + { + for(const auto& view : m_processor_view_list) + view.prepare_for_processing(); + } + + ROCPROFSYS_INLINE void finalize_processing() const noexcept + { + for(const auto& view : m_processor_view_list) + view.finalize_processing(); + } + + ROCPROFSYS_INLINE bool is_empty() const noexcept + { + return m_processor_view_list.empty(); + } + + ROCPROFSYS_INLINE void execute_sample_processing( + type_identifier_t type_identifier, const trace_cache::cacheable_t& sample) const + { + switch(type_identifier) + { + case type_identifier_t::region: + handle_sample(static_cast(sample)); + break; + case type_identifier_t::kernel_dispatch: + handle_sample(static_cast(sample)); + break; + case type_identifier_t::memory_copy: + handle_sample(static_cast(sample)); + break; +#if ROCPROFILER_VERSION >= 600 + case type_identifier_t::memory_alloc: + handle_sample(static_cast(sample)); + break; +#endif + case type_identifier_t::in_time_sample: + handle_sample(static_cast(sample)); + break; + case type_identifier_t::pmc_event_with_sample: + handle_sample(static_cast(sample)); + break; + case type_identifier_t::amd_smi_sample: + handle_sample(static_cast(sample)); + break; + case type_identifier_t::cpu_freq_sample: + handle_sample(static_cast(sample)); + break; + case type_identifier_t::backtrace_region_sample: + handle_sample(static_cast(sample)); + break; + default: throw std::runtime_error("Unsupported sample type"); + } + } + +private: + std::vector m_processor_view_list; +}; + +} // namespace trace_cache +} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/sample_type.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/sample_type.hpp index e36b1873e3..0b0b3effb0 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/sample_type.hpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/sample_type.hpp @@ -21,6 +21,7 @@ // SOFTWARE. #pragma once +#include "core/trace_cache/cacheable.hpp" #include #include #include @@ -28,36 +29,70 @@ #include #include -#if ROCPROFSYS_USE_ROCM > 0 -# include -#endif - namespace rocprofsys { namespace trace_cache { -struct storage_parsed_type_base -{}; - -struct kernel_dispatch_sample : storage_parsed_type_base +enum class type_identifier_t : uint32_t { - // Timing fields + in_time_sample = 0x0000, + pmc_event_with_sample = 0x0001, + region = 0x0002, + kernel_dispatch = 0x0003, + memory_copy = 0x0004, + memory_alloc = 0x0005, + amd_smi_sample = 0x0006, + cpu_freq_sample = 0x0007, + backtrace_region_sample = 0x0008, + fragmented_space = 0xFFFF +}; + +struct kernel_dispatch_sample : cacheable_t +{ + static constexpr type_identifier_t type_identifier = + type_identifier_t::kernel_dispatch; + + kernel_dispatch_sample() = default; + kernel_dispatch_sample(uint64_t _start_timestamp, uint64_t _end_timestamp, + uint64_t _thread_id, uint64_t _agent_id_handle, + uint64_t _kernel_id, uint64_t _dispatch_id, + uint64_t _queue_id_handle, uint64_t _correlation_id_internal, + uint64_t _correlation_id_ancestor, + uint32_t _private_segment_size, uint32_t _group_segment_size, + uint32_t _workgroup_size_x, uint32_t _workgroup_size_y, + uint32_t _workgroup_size_z, uint32_t _grid_size_x, + uint32_t _grid_size_y, uint32_t _grid_size_z, + size_t _stream_handle) + : start_timestamp(_start_timestamp) + , end_timestamp(_end_timestamp) + , thread_id(_thread_id) + , agent_id_handle(_agent_id_handle) + , kernel_id(_kernel_id) + , dispatch_id(_dispatch_id) + , queue_id_handle(_queue_id_handle) + , correlation_id_internal(_correlation_id_internal) + , correlation_id_ancestor(_correlation_id_ancestor) + , private_segment_size(_private_segment_size) + , group_segment_size(_group_segment_size) + , workgroup_size_x(_workgroup_size_x) + , workgroup_size_y(_workgroup_size_y) + , workgroup_size_z(_workgroup_size_z) + , grid_size_x(_grid_size_x) + , grid_size_y(_grid_size_y) + , grid_size_z(_grid_size_z) + , stream_handle(_stream_handle) + {} + uint64_t start_timestamp; uint64_t end_timestamp; - - // Identification fields uint64_t thread_id; uint64_t agent_id_handle; uint64_t kernel_id; uint64_t dispatch_id; uint64_t queue_id_handle; - - // Correlation fields uint64_t correlation_id_internal; uint64_t correlation_id_ancestor; - - // Dispatch configuration uint32_t private_segment_size; uint32_t group_segment_size; uint32_t workgroup_size_x; @@ -66,69 +101,206 @@ struct kernel_dispatch_sample : storage_parsed_type_base uint32_t grid_size_x; uint32_t grid_size_y; uint32_t grid_size_z; - - // Stream handle - size_t stream_handle; + size_t stream_handle; }; -struct memory_copy_sample : storage_parsed_type_base +template <> +inline void +serialize(uint8_t* buffer, const kernel_dispatch_sample& item) { - // Timing fields + utility::store_value( + buffer, item.start_timestamp, item.end_timestamp, item.thread_id, + item.agent_id_handle, item.kernel_id, item.dispatch_id, item.queue_id_handle, + item.correlation_id_internal, item.correlation_id_ancestor, + item.private_segment_size, item.group_segment_size, item.workgroup_size_x, + item.workgroup_size_y, item.workgroup_size_z, item.grid_size_x, item.grid_size_y, + item.grid_size_z, (uint64_t) item.stream_handle); +} + +template <> +inline kernel_dispatch_sample +deserialize(uint8_t*& buffer) +{ + kernel_dispatch_sample item; + uint64_t stream_handle; + utility::parse_value(buffer, item.start_timestamp, item.end_timestamp, item.thread_id, + item.agent_id_handle, item.kernel_id, item.dispatch_id, + item.queue_id_handle, item.correlation_id_internal, + item.correlation_id_ancestor, item.private_segment_size, + item.group_segment_size, item.workgroup_size_x, + item.workgroup_size_y, item.workgroup_size_z, item.grid_size_x, + item.grid_size_y, item.grid_size_z, stream_handle); + item.stream_handle = stream_handle; + return item; +} + +template <> +inline size_t +get_size(const kernel_dispatch_sample& item) +{ + return utility::get_size( + item.start_timestamp, item.end_timestamp, item.thread_id, item.agent_id_handle, + item.kernel_id, item.dispatch_id, item.queue_id_handle, + item.correlation_id_internal, item.correlation_id_ancestor, + item.private_segment_size, item.group_segment_size, item.workgroup_size_x, + item.workgroup_size_y, item.workgroup_size_z, item.grid_size_x, item.grid_size_y, + item.grid_size_z, (uint64_t) item.stream_handle); +} + +struct memory_copy_sample : cacheable_t +{ + static constexpr type_identifier_t type_identifier = type_identifier_t::memory_copy; + + memory_copy_sample() = default; + memory_copy_sample(uint64_t _start_timestamp, uint64_t _end_timestamp, + uint64_t _thread_id, uint64_t _dst_agent_id_handle, + uint64_t _src_agent_id_handle, int32_t _kind, int32_t _operation, + uint64_t _bytes, uint64_t _correlation_id_internal, + uint64_t _correlation_id_ancestor, uint64_t _dst_address_value, + uint64_t _src_address_value, size_t _stream_handle) + : start_timestamp(_start_timestamp) + , end_timestamp(_end_timestamp) + , thread_id(_thread_id) + , dst_agent_id_handle(_dst_agent_id_handle) + , src_agent_id_handle(_src_agent_id_handle) + , kind(_kind) + , operation(_operation) + , bytes(_bytes) + , correlation_id_internal(_correlation_id_internal) + , correlation_id_ancestor(_correlation_id_ancestor) + , dst_address_value(_dst_address_value) + , src_address_value(_src_address_value) + , stream_handle(_stream_handle) + {} + uint64_t start_timestamp; uint64_t end_timestamp; - - // Identification fields uint64_t thread_id; uint64_t dst_agent_id_handle; uint64_t src_agent_id_handle; - - // Operation details int32_t kind; int32_t operation; uint64_t bytes; - - // Correlation fields uint64_t correlation_id_internal; uint64_t correlation_id_ancestor; - - // Address fields (version dependent) uint64_t dst_address_value; uint64_t src_address_value; - - // Stream handle - size_t stream_handle; + size_t stream_handle; }; -#if(ROCPROFILER_VERSION >= 600) -struct memory_allocate_sample : storage_parsed_type_base +template <> +inline void +serialize(uint8_t* buffer, const memory_copy_sample& item) { - // Timing fields + utility::store_value(buffer, item.start_timestamp, item.end_timestamp, item.thread_id, + item.dst_agent_id_handle, item.src_agent_id_handle, item.kind, + item.operation, item.bytes, item.correlation_id_internal, + item.correlation_id_ancestor, item.dst_address_value, + item.src_address_value, (uint64_t) item.stream_handle); +} + +template <> +inline memory_copy_sample +deserialize(uint8_t*& buffer) +{ + memory_copy_sample item; + uint64_t stream_handle; + utility::parse_value(buffer, item.start_timestamp, item.end_timestamp, item.thread_id, + item.dst_agent_id_handle, item.src_agent_id_handle, item.kind, + item.operation, item.bytes, item.correlation_id_internal, + item.correlation_id_ancestor, item.dst_address_value, + item.src_address_value, stream_handle); + item.stream_handle = stream_handle; + return item; +} + +template <> +inline size_t +get_size(const memory_copy_sample& item) +{ + return utility::get_size( + item.start_timestamp, item.end_timestamp, item.thread_id, + item.dst_agent_id_handle, item.src_agent_id_handle, item.kind, item.operation, + item.bytes, item.correlation_id_internal, item.correlation_id_ancestor, + item.dst_address_value, item.src_address_value, (uint64_t) item.stream_handle); +} + +struct memory_allocate_sample : cacheable_t +{ + static constexpr type_identifier_t type_identifier = type_identifier_t::memory_alloc; + + memory_allocate_sample() = default; + memory_allocate_sample(uint64_t _start_timestamp, uint64_t _end_timestamp, + uint64_t _thread_id, uint64_t _agent_id_handle, int32_t _kind, + int32_t _operation, uint64_t _allocation_size, + uint64_t _correlation_id_internal, + uint64_t _correlation_id_ancestor, uint64_t _address_value, + size_t _stream_handle) + : start_timestamp(_start_timestamp) + , end_timestamp(_end_timestamp) + , thread_id(_thread_id) + , agent_id_handle(_agent_id_handle) + , kind(_kind) + , operation(_operation) + , allocation_size(_allocation_size) + , correlation_id_internal(_correlation_id_internal) + , correlation_id_ancestor(_correlation_id_ancestor) + , address_value(_address_value) + , stream_handle(_stream_handle) + {} + uint64_t start_timestamp; uint64_t end_timestamp; - - // Identification fields uint64_t thread_id; uint64_t agent_id_handle; - - // Operation details int32_t kind; int32_t operation; uint64_t allocation_size; - - // Correlation fields uint64_t correlation_id_internal; uint64_t correlation_id_ancestor; - - // Address fields (version dependent) uint64_t address_value; - - // Stream handle - size_t stream_handle; + size_t stream_handle; }; -#endif -struct region_sample : storage_parsed_type_base +template <> +inline void +serialize(uint8_t* buffer, const memory_allocate_sample& item) { + utility::store_value(buffer, item.start_timestamp, item.end_timestamp, item.thread_id, + item.agent_id_handle, item.kind, item.operation, + item.allocation_size, item.correlation_id_internal, + item.correlation_id_ancestor, item.address_value, + (uint64_t) item.stream_handle); +} + +template <> +inline memory_allocate_sample +deserialize(uint8_t*& buffer) +{ + memory_allocate_sample item; + uint64_t stream_handle; + utility::parse_value(buffer, item.start_timestamp, item.end_timestamp, item.thread_id, + item.agent_id_handle, item.kind, item.operation, + item.allocation_size, item.correlation_id_internal, + item.correlation_id_ancestor, item.address_value, stream_handle); + item.stream_handle = stream_handle; + return item; +} + +template <> +inline size_t +get_size(const memory_allocate_sample& item) +{ + return utility::get_size( + item.start_timestamp, item.end_timestamp, item.thread_id, item.agent_id_handle, + item.kind, item.operation, item.allocation_size, item.correlation_id_internal, + item.correlation_id_ancestor, item.address_value, (uint64_t) item.stream_handle); +} + +struct region_sample : cacheable_t +{ + static constexpr type_identifier_t type_identifier = type_identifier_t::region; + region_sample() = default; region_sample(uint64_t _thread_id, std::string _name, uint64_t _correlation_id_internal, uint64_t _correlation_id_ancestor, @@ -147,20 +319,74 @@ struct region_sample : storage_parsed_type_base uint64_t thread_id; std::string name; - - uint64_t correlation_id_internal; - uint64_t correlation_id_ancestor; - - uint64_t start_timestamp; - uint64_t end_timestamp; - + uint64_t correlation_id_internal; + uint64_t correlation_id_ancestor; + uint64_t start_timestamp; + uint64_t end_timestamp; std::string call_stack; std::string args_str; std::string category; }; -struct in_time_sample : storage_parsed_type_base +template <> +inline void +serialize(uint8_t* buffer, const region_sample& item) { + utility::store_value( + buffer, item.thread_id, std::string_view(item.name), item.correlation_id_internal, + item.correlation_id_ancestor, item.start_timestamp, item.end_timestamp, + std::string_view(item.call_stack), std::string_view(item.args_str), + std::string_view(item.category)); +} + +template <> +inline region_sample +deserialize(uint8_t*& buffer) +{ + region_sample item; + std::string_view name_view, call_stack_view, args_str_view, category_view; + utility::parse_value(buffer, item.thread_id, name_view, item.correlation_id_internal, + item.correlation_id_ancestor, item.start_timestamp, + item.end_timestamp, call_stack_view, args_str_view, + category_view); + item.name = std::string(name_view); + item.call_stack = std::string(call_stack_view); + item.args_str = std::string(args_str_view); + item.category = std::string(category_view); + return item; +} + +template <> +inline size_t +get_size(const region_sample& item) +{ + return utility::get_size( + item.thread_id, std::string_view(item.name), item.correlation_id_internal, + item.correlation_id_ancestor, item.start_timestamp, item.end_timestamp, + std::string_view(item.call_stack), std::string_view(item.args_str), + std::string_view(item.category)); +} + +struct in_time_sample : cacheable_t +{ + static constexpr type_identifier_t type_identifier = + type_identifier_t::in_time_sample; + + in_time_sample() = default; + in_time_sample(std::string _track_name, size_t _timestamp_ns, + std::string _event_metadata, size_t _stack_id, size_t _parent_stack_id, + size_t _correlation_id, std::string _call_stack, + std::string _line_info) + : track_name(std::move(_track_name)) + , timestamp_ns(_timestamp_ns) + , event_metadata(std::move(_event_metadata)) + , stack_id(_stack_id) + , parent_stack_id(_parent_stack_id) + , correlation_id(_correlation_id) + , call_stack(std::move(_call_stack)) + , line_info(std::move(_line_info)) + {} + std::string track_name; size_t timestamp_ns; std::string event_metadata; @@ -171,16 +397,149 @@ struct in_time_sample : storage_parsed_type_base std::string line_info; }; +template <> +inline void +serialize(uint8_t* buffer, const in_time_sample& item) +{ + utility::store_value( + buffer, std::string_view(item.track_name), (uint64_t) item.timestamp_ns, + std::string_view(item.event_metadata), (uint64_t) item.stack_id, + (uint64_t) item.parent_stack_id, (uint64_t) item.correlation_id, + std::string_view(item.call_stack), std::string_view(item.line_info)); +} + +template <> +inline in_time_sample +deserialize(uint8_t*& buffer) +{ + in_time_sample item; + std::string_view track_name_view, event_metadata_view, call_stack_view, + line_info_view; + uint64_t timestamp_ns, stack_id, parent_stack_id, correlation_id; + utility::parse_value(buffer, track_name_view, timestamp_ns, event_metadata_view, + stack_id, parent_stack_id, correlation_id, call_stack_view, + line_info_view); + item.track_name = std::string(track_name_view); + item.timestamp_ns = timestamp_ns; + item.event_metadata = std::string(event_metadata_view); + item.stack_id = stack_id; + item.parent_stack_id = parent_stack_id; + item.correlation_id = correlation_id; + item.call_stack = std::string(call_stack_view); + item.line_info = std::string(line_info_view); + return item; +} + +template <> +inline size_t +get_size(const in_time_sample& item) +{ + return utility::get_size( + std::string_view(item.track_name), (uint64_t) item.timestamp_ns, + std::string_view(item.event_metadata), (uint64_t) item.stack_id, + (uint64_t) item.parent_stack_id, (uint64_t) item.correlation_id, + std::string_view(item.call_stack), std::string_view(item.line_info)); +} + struct pmc_event_with_sample : in_time_sample { + static constexpr type_identifier_t type_identifier = + type_identifier_t::pmc_event_with_sample; + + pmc_event_with_sample() = default; + pmc_event_with_sample(std::string _track_name, size_t _timestamp_ns, + std::string _event_metadata, size_t _stack_id, + size_t _parent_stack_id, size_t _correlation_id, + std::string _call_stack, std::string _line_info, + uint32_t _device_id, uint8_t _device_type, + std::string _pmc_info_name, double _value) + : in_time_sample(std::move(_track_name), _timestamp_ns, std::move(_event_metadata), + _stack_id, _parent_stack_id, _correlation_id, std::move(_call_stack), + std::move(_line_info)) + , device_id(_device_id) + , device_type(_device_type) + , pmc_info_name(std::move(_pmc_info_name)) + , value(_value) + {} + uint32_t device_id; uint8_t device_type; std::string pmc_info_name; double value; }; -struct amd_smi_sample : storage_parsed_type_base +template <> +inline void +serialize(uint8_t* buffer, const pmc_event_with_sample& item) { + utility::store_value( + buffer, std::string_view(item.track_name), (uint64_t) item.timestamp_ns, + std::string_view(item.event_metadata), (uint64_t) item.stack_id, + (uint64_t) item.parent_stack_id, (uint64_t) item.correlation_id, + std::string_view(item.call_stack), std::string_view(item.line_info), + item.device_id, item.device_type, std::string_view(item.pmc_info_name), + item.value); +} + +template <> +inline pmc_event_with_sample +deserialize(uint8_t*& buffer) +{ + pmc_event_with_sample item; + std::string_view track_name_view, event_metadata_view, call_stack_view, + line_info_view, pmc_info_name_view; + uint64_t timestamp_ns, stack_id, parent_stack_id, correlation_id; + utility::parse_value(buffer, track_name_view, timestamp_ns, event_metadata_view, + stack_id, parent_stack_id, correlation_id, call_stack_view, + line_info_view, item.device_id, item.device_type, + pmc_info_name_view, item.value); + item.track_name = std::string(track_name_view); + item.timestamp_ns = timestamp_ns; + item.event_metadata = std::string(event_metadata_view); + item.stack_id = stack_id; + item.parent_stack_id = parent_stack_id; + item.correlation_id = correlation_id; + item.call_stack = std::string(call_stack_view); + item.line_info = std::string(line_info_view); + item.pmc_info_name = std::string(pmc_info_name_view); + return item; +} + +template <> +inline size_t +get_size(const pmc_event_with_sample& item) +{ + return utility::get_size( + std::string_view(item.track_name), (uint64_t) item.timestamp_ns, + std::string_view(item.event_metadata), (uint64_t) item.stack_id, + (uint64_t) item.parent_stack_id, (uint64_t) item.correlation_id, + std::string_view(item.call_stack), std::string_view(item.line_info), + item.device_id, item.device_type, std::string_view(item.pmc_info_name), + item.value); +} + +struct amd_smi_sample : cacheable_t +{ + static constexpr type_identifier_t type_identifier = + type_identifier_t::amd_smi_sample; + + amd_smi_sample() = default; + amd_smi_sample(uint64_t _settings, uint32_t _device_id, size_t _timestamp, + uint32_t _gfx_activity, uint32_t _umc_activity, uint32_t _mm_activity, + uint32_t _power, int64_t _temperature, size_t _mem_usage, + std::vector _gpu_activity) + : settings(_settings) + , device_id(_device_id) + , timestamp(_timestamp) + , gfx_activity(_gfx_activity) + , umc_activity(_umc_activity) + , mm_activity(_mm_activity) + , power(_power) + , temperature(_temperature) + , mem_usage(_mem_usage) + , gpu_activity(std::move(_gpu_activity)) + {} + enum class settings_positions : uint8_t { busy = 0, @@ -205,8 +564,61 @@ struct amd_smi_sample : storage_parsed_type_base std::vector gpu_activity; }; -struct cpu_freq_sample : storage_parsed_type_base +template <> +inline void +serialize(uint8_t* buffer, const amd_smi_sample& item) { + utility::store_value(buffer, item.settings, item.device_id, (uint64_t) item.timestamp, + item.gfx_activity, item.umc_activity, item.mm_activity, + item.power, item.temperature, (uint64_t) item.mem_usage, + item.gpu_activity); +} + +template <> +inline amd_smi_sample +deserialize(uint8_t*& buffer) +{ + amd_smi_sample item; + uint64_t timestamp, mem_usage; + utility::parse_value(buffer, item.settings, item.device_id, timestamp, + item.gfx_activity, item.umc_activity, item.mm_activity, + item.power, item.temperature, mem_usage, item.gpu_activity); + item.timestamp = timestamp; + item.mem_usage = mem_usage; + return item; +} + +template <> +inline size_t +get_size(const amd_smi_sample& item) +{ + return utility::get_size(item.settings, item.device_id, (uint64_t) item.timestamp, + item.gfx_activity, item.umc_activity, item.mm_activity, + item.power, item.temperature, (uint64_t) item.mem_usage, + item.gpu_activity); +} + +struct cpu_freq_sample : cacheable_t +{ + static constexpr type_identifier_t type_identifier = + type_identifier_t::cpu_freq_sample; + + cpu_freq_sample() = default; + cpu_freq_sample(size_t _timestamp, int64_t _page_rss, int64_t _virt_mem_usage, + int64_t _peak_rss, int64_t _context_switch_count, + int64_t _page_faults, int64_t _user_mode_time, + int64_t _kernel_mode_time, std::vector _freqs) + : timestamp(_timestamp) + , page_rss(_page_rss) + , virt_mem_usage(_virt_mem_usage) + , peak_rss(_peak_rss) + , context_switch_count(_context_switch_count) + , page_faults(_page_faults) + , user_mode_time(_user_mode_time) + , kernel_mode_time(_kernel_mode_time) + , freqs(std::move(_freqs)) + {} + size_t timestamp; int64_t page_rss; int64_t virt_mem_usage; @@ -218,8 +630,44 @@ struct cpu_freq_sample : storage_parsed_type_base std::vector freqs; }; -struct backtrace_region_sample : storage_parsed_type_base +template <> +inline void +serialize(uint8_t* buffer, const cpu_freq_sample& item) { + utility::store_value(buffer, (uint64_t) item.timestamp, item.page_rss, + item.virt_mem_usage, item.peak_rss, item.context_switch_count, + item.page_faults, item.user_mode_time, item.kernel_mode_time, + item.freqs); +} + +template <> +inline cpu_freq_sample +deserialize(uint8_t*& buffer) +{ + cpu_freq_sample item; + uint64_t timestamp; + utility::parse_value(buffer, timestamp, item.page_rss, item.virt_mem_usage, + item.peak_rss, item.context_switch_count, item.page_faults, + item.user_mode_time, item.kernel_mode_time, item.freqs); + item.timestamp = timestamp; + return item; +} + +template <> +inline size_t +get_size(const cpu_freq_sample& item) +{ + return utility::get_size((uint64_t) item.timestamp, item.page_rss, + item.virt_mem_usage, item.peak_rss, + item.context_switch_count, item.page_faults, + item.user_mode_time, item.kernel_mode_time, item.freqs); +} + +struct backtrace_region_sample : cacheable_t +{ + static constexpr type_identifier_t type_identifier = + type_identifier_t::backtrace_region_sample; + backtrace_region_sample() = default; backtrace_region_sample(uint32_t _type, uint64_t _thread_id, std::string _track_name, std::string _name, uint64_t _start_timestamp, @@ -242,30 +690,54 @@ struct backtrace_region_sample : storage_parsed_type_base uint64_t thread_id; std::string track_name; std::string name; - - uint64_t start_timestamp; - uint64_t end_timestamp; - + uint64_t start_timestamp; + uint64_t end_timestamp; std::string category; std::string call_stack; std::string line_info; std::string extdata; }; -enum class entry_type : uint32_t +template <> +inline void +serialize(uint8_t* buffer, const backtrace_region_sample& item) { - in_time_sample = 0x0000, - pmc_event_with_sample = 0x0001, - region = 0x0002, - kernel_dispatch = 0x0003, - memory_copy = 0x0004, -#if(ROCPROFSYS_USE_ROCM && ROCPROFILER_VERSION >= 600) - memory_alloc = 0x0005, -#endif - amd_smi_sample = 0x0006, - cpu_freq_sample = 0x0007, - backtrace_region_sample = 0x0008, - fragmented_space = 0xFFFF -}; + utility::store_value( + buffer, item.type, item.thread_id, std::string_view(item.track_name), + std::string_view(item.name), item.start_timestamp, item.end_timestamp, + std::string_view(item.category), std::string_view(item.call_stack), + std::string_view(item.line_info), std::string_view(item.extdata)); +} + +template <> +inline backtrace_region_sample +deserialize(uint8_t*& buffer) +{ + backtrace_region_sample item; + std::string_view track_name_view, name_view, category_view, call_stack_view, + line_info_view, extdata_view; + utility::parse_value(buffer, item.type, item.thread_id, track_name_view, name_view, + item.start_timestamp, item.end_timestamp, category_view, + call_stack_view, line_info_view, extdata_view); + item.track_name = std::string(track_name_view); + item.name = std::string(name_view); + item.category = std::string(category_view); + item.call_stack = std::string(call_stack_view); + item.line_info = std::string(line_info_view); + item.extdata = std::string(extdata_view); + return item; +} + +template <> +inline size_t +get_size(const backtrace_region_sample& item) +{ + return utility::get_size( + item.type, item.thread_id, std::string_view(item.track_name), + std::string_view(item.name), item.start_timestamp, item.end_timestamp, + std::string_view(item.category), std::string_view(item.call_stack), + std::string_view(item.line_info), std::string_view(item.extdata)); +} + } // namespace trace_cache } // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.cpp deleted file mode 100644 index e58ba987a5..0000000000 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.cpp +++ /dev/null @@ -1,274 +0,0 @@ -// MIT License -// -// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "storage_parser.hpp" -#include "debug.hpp" -#include "trace_cache/sample_type.hpp" -#include -#include -#include -#include -#include -#include -#include - -namespace rocprofsys -{ -namespace trace_cache -{ - -storage_parser::storage_parser(std::string _filename) -: m_filename(std::move(_filename)) -{} - -void -storage_parser::register_type_callback( - const entry_type& type, - const std::function& callback) -{ - m_callbacks[type].push_back(callback); -} - -void -storage_parser::register_on_finished_callback( - std::unique_ptr> callback) -{ - m_on_finished_callback = std::move(callback); -} - -void -storage_parser::consume_storage() -{ - ROCPROFSYS_VERBOSE(0, "Consuming buffered storage with filename: %s\n", - m_filename.c_str()); - - std::ifstream ifs(m_filename, std::ios::binary); - if(!ifs) - { - std::stringstream ss; - ss << "Error opening file for reading: " << m_filename << "\n"; - throw std::runtime_error(ss.str()); - } - - bool _parsing_needed = !m_callbacks.empty(); - - struct __attribute__((packed)) sample_header - { - entry_type type; - size_t sample_size; - }; - - sample_header header; - - while(!ifs.eof() && _parsing_needed) - { - ifs.read(reinterpret_cast(&header), sizeof(header)); - - if(header.sample_size == 0 || ifs.eof()) - { - continue; - } - - std::vector sample; - sample.reserve(header.sample_size); - ifs.read(reinterpret_cast(sample.data()), header.sample_size); - - if(ifs.bad()) - { - ROCPROFSYS_WARNING(1, - "Bad read while consuming buffered storage. Filename: %s. " - "Bytes read: %d\n", - m_filename.c_str(), static_cast(ifs.tellg())); - continue; - } - - switch(header.type) - { - case entry_type::kernel_dispatch: - { - kernel_dispatch_sample _kernel_dispatch_sample; - parse_data(sample.data(), _kernel_dispatch_sample.start_timestamp, - _kernel_dispatch_sample.end_timestamp, - _kernel_dispatch_sample.thread_id, - _kernel_dispatch_sample.agent_id_handle, - _kernel_dispatch_sample.kernel_id, - _kernel_dispatch_sample.dispatch_id, - _kernel_dispatch_sample.queue_id_handle, - _kernel_dispatch_sample.correlation_id_internal, - _kernel_dispatch_sample.correlation_id_ancestor, - _kernel_dispatch_sample.private_segment_size, - _kernel_dispatch_sample.group_segment_size, - _kernel_dispatch_sample.workgroup_size_x, - _kernel_dispatch_sample.workgroup_size_y, - _kernel_dispatch_sample.workgroup_size_z, - _kernel_dispatch_sample.grid_size_x, - _kernel_dispatch_sample.grid_size_y, - _kernel_dispatch_sample.grid_size_z, - _kernel_dispatch_sample.stream_handle); - - invoke_callbacks(header.type, _kernel_dispatch_sample); - break; - } - case entry_type::memory_copy: - { - memory_copy_sample _memory_copy_sample; - parse_data( - sample.data(), _memory_copy_sample.start_timestamp, - _memory_copy_sample.end_timestamp, _memory_copy_sample.thread_id, - _memory_copy_sample.dst_agent_id_handle, - _memory_copy_sample.src_agent_id_handle, _memory_copy_sample.kind, - _memory_copy_sample.operation, _memory_copy_sample.bytes, - _memory_copy_sample.correlation_id_internal, - _memory_copy_sample.correlation_id_ancestor, - _memory_copy_sample.dst_address_value, - _memory_copy_sample.src_address_value, - _memory_copy_sample.stream_handle); - invoke_callbacks(header.type, _memory_copy_sample); - break; - } -#if(ROCPROFILER_VERSION >= 600) - case entry_type::memory_alloc: - { - memory_allocate_sample _memory_allocate_sample; - parse_data(sample.data(), _memory_allocate_sample.start_timestamp, - _memory_allocate_sample.end_timestamp, - _memory_allocate_sample.thread_id, - _memory_allocate_sample.agent_id_handle, - _memory_allocate_sample.kind, - _memory_allocate_sample.operation, - _memory_allocate_sample.allocation_size, - _memory_allocate_sample.correlation_id_internal, - _memory_allocate_sample.correlation_id_ancestor, - _memory_allocate_sample.address_value, - _memory_allocate_sample.stream_handle); - - invoke_callbacks(header.type, _memory_allocate_sample); - break; - } -#endif - case entry_type::region: - { - region_sample _region_sample; - parse_data(sample.data(), _region_sample.thread_id, _region_sample.name, - _region_sample.correlation_id_internal, - _region_sample.correlation_id_ancestor, - _region_sample.start_timestamp, _region_sample.end_timestamp, - _region_sample.call_stack, _region_sample.args_str, - _region_sample.category); - - invoke_callbacks(header.type, _region_sample); - break; - } - case entry_type::in_time_sample: - { - in_time_sample _in_time_sample; - parse_data(sample.data(), _in_time_sample.track_name, - _in_time_sample.timestamp_ns, _in_time_sample.event_metadata, - _in_time_sample.stack_id, _in_time_sample.parent_stack_id, - _in_time_sample.correlation_id, _in_time_sample.call_stack, - _in_time_sample.line_info); - invoke_callbacks(header.type, _in_time_sample); - break; - } - case entry_type::pmc_event_with_sample: - { - pmc_event_with_sample _pmc_event_with_sample; - parse_data( - sample.data(), _pmc_event_with_sample.track_name, - _pmc_event_with_sample.timestamp_ns, - _pmc_event_with_sample.event_metadata, - _pmc_event_with_sample.stack_id, - _pmc_event_with_sample.parent_stack_id, - _pmc_event_with_sample.correlation_id, - _pmc_event_with_sample.call_stack, _pmc_event_with_sample.line_info, - _pmc_event_with_sample.device_id, _pmc_event_with_sample.device_type, - _pmc_event_with_sample.pmc_info_name, _pmc_event_with_sample.value); - invoke_callbacks(header.type, _pmc_event_with_sample); - break; - } - case entry_type::amd_smi_sample: - { - amd_smi_sample _amd_smi_sample; - parse_data(sample.data(), _amd_smi_sample.settings, - _amd_smi_sample.device_id, _amd_smi_sample.timestamp, - _amd_smi_sample.gfx_activity, _amd_smi_sample.umc_activity, - _amd_smi_sample.mm_activity, _amd_smi_sample.power, - _amd_smi_sample.temperature, _amd_smi_sample.mem_usage, - _amd_smi_sample.gpu_activity); - invoke_callbacks(header.type, _amd_smi_sample); - break; - } - case entry_type::cpu_freq_sample: - { - cpu_freq_sample _cpu_freq_sample; - parse_data(sample.data(), _cpu_freq_sample.timestamp, - _cpu_freq_sample.page_rss, _cpu_freq_sample.virt_mem_usage, - _cpu_freq_sample.peak_rss, - _cpu_freq_sample.context_switch_count, - _cpu_freq_sample.page_faults, _cpu_freq_sample.user_mode_time, - _cpu_freq_sample.kernel_mode_time, _cpu_freq_sample.freqs); - invoke_callbacks(header.type, _cpu_freq_sample); - break; - } - case entry_type::backtrace_region_sample: - { - backtrace_region_sample _backtrace_region_sample; - parse_data( - sample.data(), _backtrace_region_sample.type, - _backtrace_region_sample.thread_id, - _backtrace_region_sample.track_name, _backtrace_region_sample.name, - _backtrace_region_sample.start_timestamp, - _backtrace_region_sample.end_timestamp, - _backtrace_region_sample.category, - _backtrace_region_sample.call_stack, - _backtrace_region_sample.line_info, _backtrace_region_sample.extdata); - invoke_callbacks(header.type, _backtrace_region_sample); - } - default: break; - } - } - - ifs.close(); - - if(m_on_finished_callback != nullptr) - { - (*m_on_finished_callback)(); - } -} - -void -storage_parser::invoke_callbacks(entry_type type, const storage_parsed_type_base& parsed) -{ - auto _callback_list = m_callbacks.find(type); - if(_callback_list == m_callbacks.end()) - { - ROCPROFSYS_VERBOSE(1, "Callback not found for cache postprocessing\n"); - return; - } - - for(auto& cb : _callback_list->second) - { - cb(parsed); - } -} -} // namespace trace_cache -} // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.hpp index afc56c1e69..f85698ed9d 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.hpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.hpp @@ -22,73 +22,142 @@ #pragma once -#include "buffer_storage.hpp" -#include "sample_type.hpp" +#include "common/defines.h" +#include "core/debug.hpp" +#include "core/trace_cache/cacheable.hpp" +#include "core/trace_cache/type_registry.hpp" + #include +#include #include #include #include -#include -#include -#include -#include +#include +#include #include -#include -#include namespace rocprofsys { namespace trace_cache { -using postprocessing_callback = std::function; -class cache_manager; +template class storage_parser { -public: - void register_type_callback(const entry_type& type, - const postprocessing_callback& callback); + static_assert(type_traits::is_enum_class_v, + "TypeIdentifierEnum must be an enum class"); - void consume_storage(); - void register_on_finished_callback(std::unique_ptr> callback); + static_assert(sizeof...(SupportedTypes) != 0, "SupportedTypes must be non-empty"); + +public: + storage_parser(std::string _filename) + : m_filename(std::move(_filename)) + {} + + template + void load(std::shared_ptr _type_processing) + { + static_assert( + type_traits::has_execute_processing::value, + "TypeProcessing must have member function " + "execute_sample_processing(TypeIdentifierEnum, const cacheable_t&)"); + + if(_type_processing == nullptr) + { + throw std::runtime_error("TypeProcessing is nullptr"); + } + + ROCPROFSYS_DEBUG("Consuming buffered storage with filename: %s\n", + m_filename.c_str()); + + std::ifstream ifs(m_filename, std::ios::binary); + if(!ifs.good()) + { + std::stringstream ss; + ss << "Error opening file for reading: " << m_filename << "\n"; + throw std::runtime_error(ss.str()); + } + + struct __attribute__((packed)) sample_header + { + TypeIdentifierEnum type; + size_t sample_size; + }; + + sample_header header; + + std::vector sample; + sample.reserve(4096); + size_t last_capacity = sample.capacity(); + + while(!ifs.eof()) + { + if(!ifs.good()) + { + ROCPROFSYS_WARNING(0, + "Stream not in good state, stopping parse. File: %s\n", + m_filename.c_str()); + break; + } + + ifs.read(reinterpret_cast(&header), sizeof(header)); + + if(header.sample_size == 0 || ifs.eof()) + { + continue; + } + + if(ROCPROFSYS_UNLIKELY(header.sample_size > last_capacity)) + { + sample.reserve(header.sample_size); + last_capacity = sample.capacity(); + } + + sample.resize(header.sample_size); + ifs.read(reinterpret_cast(sample.data()), header.sample_size); + + if(ifs.fail()) + { + ROCPROFSYS_WARNING(1, + "Bad read while consuming buffered storage. Filename: " + "%s Bytes read: %d\n", + m_filename.c_str(), static_cast(ifs.tellg())); + continue; + } + + if(header.type == TypeIdentifierEnum::fragmented_space) + { + continue; + } + + auto* data = sample.data(); + + auto sample_value = m_registry.get_type(header.type, data); + if(sample_value.has_value()) + { + _type_processing->execute_sample_processing( + header.type, std::visit( + [](auto& arg) -> cacheable_t& { + return static_cast(arg); + }, + sample_value.value())); + } + else + { + ROCPROFSYS_DEBUG("Unsupported type detected. Skipping current sample.\n"); + continue; + } + } + + ifs.close(); + ROCPROFSYS_DEBUG("File parsing finished. Removing %s from file system.\n", + m_filename.c_str()); + std::remove(m_filename.c_str()); + } private: - friend class cache_manager; - storage_parser(std::string _filename); - - template - static void process_arg(const uint8_t*& data_pos, T& arg) - { - if constexpr(std::is_same_v) - { - arg = std::string((const char*) data_pos); - data_pos += arg.size() + 1; - } - else if constexpr(std::is_same_v>) - { - size_t vector_size = *reinterpret_cast(data_pos); - data_pos += sizeof(size_t); - arg.reserve(vector_size); - std::copy_n(data_pos, vector_size, std::back_inserter(arg)); - data_pos += vector_size; - } - else - { - arg = *reinterpret_cast(data_pos); - data_pos += sizeof(T); - } - } - - template - static void parse_data(const uint8_t* data_pos, Args&... args) - { - (process_arg(data_pos, args), ...); - } - - void invoke_callbacks(entry_type type, const storage_parsed_type_base& parsed); - - std::string m_filename; - std::map> m_callbacks; - std::unique_ptr> m_on_finished_callback{ nullptr }; + std::string m_filename; + type_registry m_registry; }; } // namespace trace_cache diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_utility.hpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/type_registry.hpp similarity index 50% rename from projects/rocprofiler-systems/source/lib/core/trace_cache/cache_utility.hpp rename to projects/rocprofiler-systems/source/lib/core/trace_cache/type_registry.hpp index b8a1bedd95..960b8abcc6 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_utility.hpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/type_registry.hpp @@ -21,37 +21,53 @@ // SOFTWARE. #pragma once -#include "library/runtime.hpp" -#include "sample_type.hpp" -#include -#include -#include -#include +#include "core/trace_cache/cache_type_traits.hpp" + +#include +#include +#include namespace rocprofsys { namespace trace_cache { -constexpr size_t buffer_size = 100 * tim::units::megabyte; -constexpr size_t flush_threshold = 80 * tim::units::megabyte; -const auto tmp_directory = std::string{ "/tmp/" }; +template +class type_registry +{ + static_assert(type_traits::is_enum_class_v, + "TypeIdentifierEnum must be an enum class"); -const auto get_buffered_storage_filename = [](const int& ppid, const int& pid) { - return std::string{ tmp_directory + "buffered_storage_" + std::to_string(ppid) + "_" + - std::to_string(pid) + ".bin" }; +public: + using variant_t = typename std::variant; + + type_registry() { (register_type(), ...); } + + std::optional get_type(TypeIdentifierEnum id, uint8_t*& data) + { + auto it = deserializers.find(id); + if(it != deserializers.end()) + { + return it->second(data); + } + return std::nullopt; + } + +private: + std::map> deserializers; + + template + inline void register_type() + { + static_assert(type_traits::has_type_identifier::value, + "Type must have type_identifier"); + static_assert(type_traits::has_deserialize::value, + "Type must have deserialize function"); + deserializers[T::type_identifier] = [](uint8_t*& data) -> variant_t { + return deserialize(data); + }; + } }; -const auto get_metadata_filepath = [](const int& ppid, const int& pid) { - return std::string{ tmp_directory + "metadata_" + std::to_string(ppid) + "_" + - std::to_string(pid) + ".json" }; -}; - -constexpr size_t minimal_fragmented_memory_size = sizeof(entry_type) + sizeof(size_t); -using buffer_array_t = std::array; - -constexpr auto ABSOLUTE = "ABS"; -constexpr auto PERCENTAGE = "%"; - } // namespace trace_cache } // namespace rocprofsys diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp index 395aee22a2..8463842531 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library.cpp @@ -45,7 +45,7 @@ #include "core/rocpd/data_processor.hpp" #include "core/timemory.hpp" #include "core/trace_cache/cache_manager.hpp" -#include "core/trace_cache/cache_utility.hpp" +#include "core/trace_cache/cacheable.hpp" #include "core/trace_cache/metadata_registry.hpp" #include "core/utility.hpp" #include "library/causal/data.hpp" @@ -578,7 +578,7 @@ rocprofsys_init_tooling_hidden(void) ROCPROFSYS_DEBUG_F("State: %s -> State::Active\n", std::to_string(get_state()).c_str()); - trace_cache::get_buffer_storage().start_flushing_thread(getpid()); + trace_cache::get_buffer_storage().start(getpid()); set_state(State::Active); // set to active as very last operation } }; @@ -798,7 +798,7 @@ rocprofsys_finalize_hidden(void) const auto _agents = get_agent_manager_instance().get_agents(); _manager.shutdown(); const auto metadata_filepath = - trace_cache::get_metadata_filepath(get_root_process_id(), getpid()); + trace_cache::utility::get_metadata_filepath(get_root_process_id(), getpid()); _manager.get_metadata_registry().save_to_file(metadata_filepath, _agents); std::quick_exit(EXIT_SUCCESS); diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/amd_smi.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/amd_smi.cpp index c4e56cb128..5a7b1edcc3 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/amd_smi.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/amd_smi.cpp @@ -28,7 +28,7 @@ #include "core/agent.hpp" #include "core/trace_cache/cache_manager.hpp" -#include "core/trace_cache/cache_utility.hpp" +#include "core/trace_cache/cacheable.hpp" #include "core/trace_cache/sample_type.hpp" #include #include @@ -693,12 +693,11 @@ data::sample(uint32_t _device_id) // Store samples if basic metrics are enabled OR if there's advanced metric data if(_basic_metrics_enabled || has_data) { - trace_cache::get_buffer_storage().store( - trace_cache::entry_type::amd_smi_sample, serialize_settings(m_dev_id), - _device_id, _timestamp, m_busy_perc.gfx_activity, - m_busy_perc.umc_activity, m_busy_perc.mm_activity, - m_power.current_socket_power, m_temp, m_mem_usage, - serialize_gpu_metrics(m_dev_id, metrics, capabilities)); + trace_cache::get_buffer_storage().store(trace_cache::amd_smi_sample{ + serialize_settings(m_dev_id), _device_id, _timestamp, + m_busy_perc.gfx_activity, m_busy_perc.umc_activity, + m_busy_perc.mm_activity, m_power.current_socket_power, m_temp, + m_mem_usage, serialize_gpu_metrics(m_dev_id, metrics, capabilities) }); if(has_data) m_gpu_metrics.push_back(metrics); } diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.cpp index 90ce90198a..222ed76354 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/backtrace_metrics.cpp @@ -27,7 +27,7 @@ #include "core/debug.hpp" #include "core/perfetto.hpp" #include "core/trace_cache/cache_manager.hpp" -#include "core/trace_cache/cache_utility.hpp" +#include "core/trace_cache/cacheable.hpp" #include "core/trace_cache/metadata_registry.hpp" #include "library/components/ensure_storage.hpp" #include "library/ptl.hpp" @@ -261,11 +261,10 @@ cache_backtrace_metrics_events(const uint32_t device_id, uint64_t timestamp_ns, const auto* line_info = ""; auto insert_event_and_sample = [&](const char* _track_name, double _value) { - trace_cache::get_buffer_storage().store( - trace_cache::entry_type::pmc_event_with_sample, _track_name, timestamp_ns, - event_metadata, stack_id, parent_stack_id, correlation_id, call_stack, - line_info, device_id, static_cast(agent_type::CPU), _track_name, - _value); + trace_cache::get_buffer_storage().store(trace_cache::pmc_event_with_sample{ + _track_name, timestamp_ns, event_metadata, stack_id, parent_stack_id, + correlation_id, call_stack, line_info, device_id, + static_cast(agent_type::CPU), _track_name, _value }); }; if constexpr(std::is_same_v) diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/category_region.hpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/category_region.hpp index 0b60729287..2a4465c372 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/category_region.hpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/category_region.hpp @@ -27,6 +27,7 @@ #include "core/state.hpp" #include "core/timemory.hpp" #include "core/trace_cache/cache_manager.hpp" +#include "core/trace_cache/sample_type.hpp" #include "library/causal/data.hpp" #include "library/runtime.hpp" #include "library/thread_info.hpp" @@ -55,9 +56,9 @@ cache_region(uint64_t thread_id, const std::string& name, uint64_t start_ts, constexpr const char* CALLSTACK = ""; constexpr const char* ARGUMENTS = ""; rocprofsys::trace_cache::get_buffer_storage().store( - rocprofsys::trace_cache::entry_type::region, thread_id, name.c_str(), - NO_CORRELATION_ID, NO_CORRELATION_ID, start_ts, end_ts, CALLSTACK, ARGUMENTS, - category.c_str()); + rocprofsys::trace_cache::region_sample{ + thread_id, name.c_str(), NO_CORRELATION_ID, NO_CORRELATION_ID, start_ts, + end_ts, CALLSTACK, ARGUMENTS, category.c_str() }); } struct entry_key diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp index 7a1d1fe162..25597cb373 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/components/comm_data.cpp @@ -151,12 +151,11 @@ cache_comm_data_events(const uint32_t device_id, int bytes) const std::string call_stack = "{}"; const std::string line_info = "{}"; - trace_cache::get_buffer_storage().store( - trace_cache::entry_type::pmc_event_with_sample, track_name.c_str(), timestamp_ns, - event_metadata.c_str(), stack_id, parent_stack_id, correlation_id, - call_stack.c_str(), line_info.c_str(), device_id, + trace_cache::get_buffer_storage().store(trace_cache::pmc_event_with_sample{ + track_name.c_str(), timestamp_ns, event_metadata.c_str(), stack_id, + parent_stack_id, correlation_id, call_stack.c_str(), line_info.c_str(), device_id, static_cast(agent_type::CPU), track_name.c_str(), - static_cast(value)); + static_cast(value) }); } } // namespace diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/cpu_freq.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/cpu_freq.cpp index 923b38fd9d..0c09375599 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/cpu_freq.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/cpu_freq.cpp @@ -260,14 +260,13 @@ sample() auto _freqs = component::cpu_freq{}.sample(); // user and kernel mode times are in microseconds - trace_cache::get_buffer_storage().store( - trace_cache::entry_type::cpu_freq_sample, _timestamp, tim::get_page_rss(), - tim::get_virt_mem(), _rcache.get_peak_rss(), + trace_cache::get_buffer_storage().store(trace_cache::cpu_freq_sample{ + _timestamp, tim::get_page_rss(), tim::get_virt_mem(), _rcache.get_peak_rss(), _rcache.get_num_priority_context_switch() + _rcache.get_num_voluntary_context_switch(), _rcache.get_num_major_page_faults() + _rcache.get_num_minor_page_faults(), _rcache.get_user_mode_time() * 1000, _rcache.get_kernel_mode_time() * 1000, - serialize_freqs(_freqs)); + serialize_freqs(_freqs) }); data.emplace_back( _timestamp, tim::get_page_rss(), tim::get_virt_mem(), _rcache.get_peak_rss(), diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/kokkosp.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/kokkosp.cpp index 5d9f2cad90..69dd361f8d 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/kokkosp.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/kokkosp.cpp @@ -191,10 +191,10 @@ cache_kokkos_event(const char* name, const char* event_type, const char* target, const char* line_info = "{}"; rocprofsys::trace_cache::get_buffer_storage().store( - rocprofsys::trace_cache::entry_type::in_time_sample, - rocprofsys::trait::name::value, timestamp_ns, - event_metadata.dump().c_str(), stack_id, parent_stack_id, correlation_id, - call_stack, line_info); + rocprofsys::trace_cache::in_time_sample{ + rocprofsys::trait::name::value, timestamp_ns, + event_metadata.dump().c_str(), stack_id, parent_stack_id, correlation_id, + call_stack, line_info }); } } // namespace diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp index ec72a14550..0f624bde3f 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk.cpp @@ -526,7 +526,6 @@ get_mem_alloc_address( } #endif -// clang-format off void cache_region(const rocprofiler_callback_tracing_record_t* record, const rocprofiler_timestamp_t start_timestamp, @@ -538,92 +537,62 @@ cache_region(const rocprofiler_callback_tracing_record_t* record, trace_cache::get_metadata_registry().get_callback_tracing_info(); auto _name = std::string{ callback_tracing_info.at(record->kind, record->operation) }; - trace_cache::get_buffer_storage().store( - trace_cache::entry_type::region, - record->thread_id, - _name.c_str(), - record->correlation_id.internal, - get_parent_stack_id(record->correlation_id), - start_timestamp, - end_timestamp, - call_stack.c_str(), - args_str.c_str(), - category.c_str()); + trace_cache::get_buffer_storage().store(trace_cache::region_sample{ + record->thread_id, _name.c_str(), record->correlation_id.internal, + get_parent_stack_id(record->correlation_id), start_timestamp, end_timestamp, + call_stack.c_str(), args_str.c_str(), category.c_str() }); } void -cache_kernel_dispatch(rocprofiler_buffer_tracing_kernel_dispatch_record_t* record, uint64_t stream_handle) +cache_kernel_dispatch(rocprofiler_buffer_tracing_kernel_dispatch_record_t* record, + uint64_t stream_handle) { - auto queue_handle = record->dispatch_info.queue_id.handle; + auto queue_handle = record->dispatch_info.queue_id.handle; trace_cache::get_metadata_registry().add_queue(queue_handle); trace_cache::get_metadata_registry().add_stream(stream_handle); - trace_cache::get_buffer_storage().store( - trace_cache::entry_type::kernel_dispatch, - record->start_timestamp, - record->end_timestamp, - record->thread_id, - record->dispatch_info.agent_id.handle, - record->dispatch_info.kernel_id, - record->dispatch_info.dispatch_id, - record->dispatch_info.queue_id.handle, - record->correlation_id.internal, - get_parent_stack_id(record->correlation_id), + trace_cache::get_buffer_storage().store(trace_cache::kernel_dispatch_sample{ + record->start_timestamp, record->end_timestamp, record->thread_id, + record->dispatch_info.agent_id.handle, record->dispatch_info.kernel_id, + record->dispatch_info.dispatch_id, record->dispatch_info.queue_id.handle, + record->correlation_id.internal, get_parent_stack_id(record->correlation_id), record->dispatch_info.private_segment_size, - record->dispatch_info.group_segment_size, - record->dispatch_info.workgroup_size.x, - record->dispatch_info.workgroup_size.y, - record->dispatch_info.workgroup_size.z, - record->dispatch_info.grid_size.x, - record->dispatch_info.grid_size.y, - record->dispatch_info.grid_size.z, - stream_handle); - + record->dispatch_info.group_segment_size, record->dispatch_info.workgroup_size.x, + record->dispatch_info.workgroup_size.y, record->dispatch_info.workgroup_size.z, + record->dispatch_info.grid_size.x, record->dispatch_info.grid_size.y, + record->dispatch_info.grid_size.z, stream_handle }); } void -cache_memory_copy(rocprofiler_buffer_tracing_memory_copy_record_t* record, uint64_t stream_handle) +cache_memory_copy(rocprofiler_buffer_tracing_memory_copy_record_t* record, + uint64_t stream_handle) { trace_cache::get_metadata_registry().add_stream(stream_handle); - trace_cache::get_buffer_storage().store( - trace_cache::entry_type::memory_copy, - record->start_timestamp, - record->end_timestamp, - record->thread_id, - record->dst_agent_id.handle, - record->src_agent_id.handle, - static_cast(record->kind), - static_cast(record->operation), - record->bytes, - record->correlation_id.internal, - get_parent_stack_id(record->correlation_id), - get_mem_copy_dst_address(*record), - get_mem_copy_src_address(*record), - stream_handle); + trace_cache::get_buffer_storage().store(trace_cache::memory_copy_sample{ + + record->start_timestamp, record->end_timestamp, record->thread_id, + record->dst_agent_id.handle, record->src_agent_id.handle, + static_cast(record->kind), static_cast(record->operation), + record->bytes, record->correlation_id.internal, + get_parent_stack_id(record->correlation_id), get_mem_copy_dst_address(*record), + get_mem_copy_src_address(*record), stream_handle }); } -#if (ROCPROFILER_VERSION >= 600) +#if(ROCPROFILER_VERSION >= 600) void -cache_memory_allocation(rocprofiler_buffer_tracing_memory_allocation_record_t* record, uint64_t stream_handle) +cache_memory_allocation(rocprofiler_buffer_tracing_memory_allocation_record_t* record, + uint64_t stream_handle) { trace_cache::get_metadata_registry().add_stream(stream_handle); - trace_cache::get_buffer_storage().store( - trace_cache::entry_type::memory_alloc, - record->start_timestamp, - record->end_timestamp, - record->thread_id, - record->agent_id.handle, - static_cast(record->kind), - static_cast(record->operation), - record->allocation_size, - record->correlation_id.internal, - get_parent_stack_id(record->correlation_id), - get_mem_alloc_address(*record), - stream_handle); + trace_cache::get_buffer_storage().store(trace_cache::memory_allocate_sample{ + record->start_timestamp, record->end_timestamp, record->thread_id, + record->agent_id.handle, static_cast(record->kind), + static_cast(record->operation), record->allocation_size, + record->correlation_id.internal, get_parent_stack_id(record->correlation_id), + get_mem_alloc_address(*record), stream_handle }); } #endif -// clang-format on std::string get_args_string(const function_args_t& args) diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.cpp index 83ca3b305c..09e62675c0 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/rocprofiler-sdk/counters.cpp @@ -120,12 +120,11 @@ counter_event::operator()(const client_data* tool_data, ::perfetto::CounterTrack auto agent = get_agent_manager_instance().get_agent_by_handle(agent_handle); - trace_cache::get_buffer_storage().store( - trace_cache::entry_type::pmc_event_with_sample, track_name.c_str(), - _timing.start, event_metadata.c_str(), stack_id, parent_stack_id, - correlation_id, call_stack.c_str(), line_info.c_str(), + trace_cache::get_buffer_storage().store(trace_cache::pmc_event_with_sample{ + track_name.c_str(), _timing.start, event_metadata.c_str(), stack_id, + parent_stack_id, correlation_id, call_stack.c_str(), line_info.c_str(), static_cast(agent.device_id), static_cast(agent.type), - track_name.c_str(), value); + track_name.c_str(), static_cast(value) }); } } diff --git a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp index 4c08236b96..1c1be80192 100644 --- a/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp +++ b/projects/rocprofiler-systems/source/lib/rocprof-sys/library/sampling.cpp @@ -315,13 +315,12 @@ cache_sampling_data(int64_t _tid, const std::vector& _timer auto _call_stack = generate_call_stack_json(iitr); auto _line_info = generate_line_info_json(iitr); - trace_cache::get_buffer_storage().store( - trace_cache::entry_type::backtrace_region_sample, + trace_cache::get_buffer_storage().store(trace_cache::backtrace_region_sample{ static_cast(ROCPROFSYS_CATEGORY_TIMER_SAMPLING), static_cast(_thread_info->index_data->system_value), _track_name.c_str(), _name.c_str(), itr.m_beg, itr.m_end, trait::name::value, _call_stack.c_str(), - _line_info.c_str(), "{}"); + _line_info.c_str(), "{}" }); } } @@ -348,13 +347,12 @@ cache_sampling_data(int64_t _tid, const std::vector& _timer auto _call_stack = generate_call_stack_json(iitr); auto _line_info = generate_line_info_json(iitr); - trace_cache::get_buffer_storage().store( - trace_cache::entry_type::backtrace_region_sample, + trace_cache::get_buffer_storage().store(trace_cache::backtrace_region_sample{ static_cast(ROCPROFSYS_CATEGORY_OVERFLOW_SAMPLING), static_cast(_thread_info->index_data->system_value), _track_name.c_str(), _name.c_str(), itr.m_beg, itr.m_end, trait::name::value, _call_stack.c_str(), - _line_info.c_str(), "{}"); + _line_info.c_str(), "{}" }); } } }