[rocprof-sys] Refactor trace_cache architecture with improved type erasure and processing pipeline (#1710)
- Redesigned buffer_storage with a flush_worker pattern for better thread management and resource cleanup - Introduced type-safe abstractions through new components: cacheable.hpp, cache_type_traits.hpp, sample_processor.hpp, and type_registry.hpp - Optimized type erasure implementation in sample processor to reduce runtime overhead - Renamed rocpd_post_processing to rocpd_processor and restructured the processing pipeline - Removed storage_parser.cpp and integrated functionality into header-based template implementation - Enhanced cache_manager with improved processing workflow and better separation of concerns
This commit is contained in:
committad av
GitHub
förälder
2f6fb89c51
incheckning
4d670099fa
@@ -22,23 +22,27 @@
|
||||
|
||||
set(trace_cache_sources
|
||||
${CMAKE_CURRENT_LIST_DIR}/cache_manager.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/storage_parser.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/buffer_storage.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/metadata_registry.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rocpd_post_processing.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rocpd_processor.cpp
|
||||
)
|
||||
|
||||
set(trace_cache_headers
|
||||
${CMAKE_CURRENT_LIST_DIR}/cache_manager.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/storage_parser.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/buffer_storage.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/cache_utility.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/cacheable.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/type_registry.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/cache_type_traits.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/metadata_registry.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rocpd_post_processing.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/rocpd_processor.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/sample_processor.hpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/sample_type.hpp
|
||||
)
|
||||
|
||||
target_sources(
|
||||
target_sources(rocprofiler-systems-core-library PRIVATE ${trace_cache_sources})
|
||||
|
||||
target_include_directories(
|
||||
rocprofiler-systems-core-library
|
||||
PRIVATE ${trace_cache_sources} ${trace_cache_headers}
|
||||
PUBLIC ${CMAKE_CURRENT_LIST_DIR}
|
||||
)
|
||||
|
||||
@@ -40,162 +40,89 @@ namespace rocprofsys
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
constexpr auto CACHE_FILE_FLUSH_TIMEOUT = 10ms;
|
||||
constexpr auto NUM_OF_THREADS = 1;
|
||||
} // namespace
|
||||
flush_worker_t::flush_worker_t(worker_function_t worker_function,
|
||||
worker_synchronization_ptr_t worker_synchronization_ptr,
|
||||
std::string filepath)
|
||||
|
||||
buffer_storage::buffer_storage()
|
||||
{
|
||||
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
m_thread_pool = std::make_unique<PTL::ThreadPool>(NUM_OF_THREADS);
|
||||
m_thread_pool->initialize_threadpool(NUM_OF_THREADS);
|
||||
|
||||
m_task_group = std::make_unique<PTL::TaskGroup<void>>(m_thread_pool.get());
|
||||
}
|
||||
: m_worker_function(std::move(worker_function))
|
||||
, m_worker_synchronization(std::move(worker_synchronization_ptr))
|
||||
, m_filepath(std::move(filepath))
|
||||
{}
|
||||
|
||||
void
|
||||
buffer_storage::start_flushing_thread(pid_t _pid)
|
||||
flush_worker_t::start(const pid_t& current_pid)
|
||||
{
|
||||
if(m_worker_synchronization->is_running)
|
||||
{
|
||||
std::stringstream _ss;
|
||||
_ss << "Flush worker is already running";
|
||||
throw std::runtime_error(_ss.str());
|
||||
}
|
||||
|
||||
m_ofs = std::ofstream{ m_filepath, std::ios::binary | std::ios::out };
|
||||
|
||||
if(!m_ofs.good())
|
||||
{
|
||||
std::stringstream _ss;
|
||||
_ss << "Error opening file for writing: " << m_filepath;
|
||||
throw std::runtime_error(_ss.str());
|
||||
}
|
||||
|
||||
m_worker_synchronization->origin_pid = current_pid;
|
||||
m_worker_synchronization->is_running = true;
|
||||
|
||||
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
m_task_group->exec([this, _pid]() {
|
||||
auto filepath = get_buffered_storage_filename(get_root_process_id(), getpid());
|
||||
std::ofstream _ofs(filepath, std::ios::binary | std::ios::out);
|
||||
|
||||
if(!_ofs)
|
||||
{
|
||||
std::stringstream _ss;
|
||||
_ss << "Error opening file for writing: " << filepath;
|
||||
throw std::runtime_error(_ss.str());
|
||||
}
|
||||
|
||||
auto execute_flush = [&](std::ofstream& ofs, bool force = false) {
|
||||
size_t _head, _tail;
|
||||
{
|
||||
std::lock_guard guard{ m_mutex };
|
||||
_head = m_head;
|
||||
_tail = m_tail;
|
||||
|
||||
if(_head == _tail)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
auto used_space =
|
||||
m_head > m_tail ? (m_head - m_tail) : (buffer_size - m_tail + m_head);
|
||||
if(!force && used_space < flush_threshold)
|
||||
{
|
||||
return;
|
||||
}
|
||||
m_tail = m_head;
|
||||
}
|
||||
|
||||
if(_head > _tail)
|
||||
{
|
||||
ofs.write(reinterpret_cast<const char*>(m_buffer->data() + _tail),
|
||||
_head - _tail);
|
||||
}
|
||||
else
|
||||
{
|
||||
ofs.write(reinterpret_cast<const char*>(m_buffer->data() + _tail),
|
||||
buffer_size - _tail);
|
||||
ofs.write(reinterpret_cast<const char*>(m_buffer->data()), _head);
|
||||
}
|
||||
};
|
||||
|
||||
m_created_process = _pid;
|
||||
m_flushing_thread = std::make_unique<std::thread>([&]() {
|
||||
std::mutex _shutdown_condition_mutex;
|
||||
while(m_running)
|
||||
while(m_worker_synchronization->is_running)
|
||||
{
|
||||
execute_flush(_ofs);
|
||||
m_worker_function(m_ofs, false);
|
||||
std::unique_lock _lock{ _shutdown_condition_mutex };
|
||||
m_shutdown_condition.wait_for(
|
||||
_lock, std::chrono::milliseconds(CACHE_FILE_FLUSH_TIMEOUT),
|
||||
[&]() { return !m_running; });
|
||||
m_worker_synchronization->is_running_condition.wait_for(
|
||||
_lock, CACHE_FILE_FLUSH_TIMEOUT,
|
||||
[&]() { return !m_worker_synchronization->is_running; });
|
||||
}
|
||||
|
||||
execute_flush(_ofs, true);
|
||||
_ofs.close();
|
||||
m_exit_finished = true;
|
||||
m_exit_condition.notify_one();
|
||||
m_worker_function(m_ofs, true);
|
||||
m_ofs.close();
|
||||
m_worker_synchronization->exit_finished = true;
|
||||
m_worker_synchronization->exit_finished_condition.notify_one();
|
||||
});
|
||||
}
|
||||
|
||||
buffer_storage::~buffer_storage()
|
||||
{
|
||||
shutdown();
|
||||
if(m_thread_pool && m_thread_pool->is_alive())
|
||||
{
|
||||
m_thread_pool->destroy_threadpool();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
buffer_storage::shutdown()
|
||||
flush_worker_t::stop(const pid_t& current_pid)
|
||||
{
|
||||
if(!m_running)
|
||||
const bool flushing_thread_exist = m_flushing_thread != nullptr;
|
||||
const bool worker_is_running =
|
||||
m_worker_synchronization != nullptr && m_worker_synchronization->is_running;
|
||||
|
||||
if(flushing_thread_exist && worker_is_running)
|
||||
{
|
||||
return;
|
||||
}
|
||||
ROCPROFSYS_DEBUG("Buffer storage shutting down..\n");
|
||||
m_worker_synchronization->is_running = false;
|
||||
m_worker_synchronization->is_running_condition.notify_all();
|
||||
|
||||
ROCPROFSYS_DEBUG("Buffer storage shutting down..");
|
||||
m_running = false;
|
||||
m_shutdown_condition.notify_all();
|
||||
|
||||
if(m_created_process != getpid())
|
||||
{
|
||||
ROCPROFSYS_DEBUG(
|
||||
"Buffer storage is not created in same process as shutting down..");
|
||||
return;
|
||||
}
|
||||
|
||||
std::mutex _exit_mutex;
|
||||
std::unique_lock _exit_lock{ _exit_mutex };
|
||||
m_exit_condition.wait(_exit_lock, [&]() { return m_exit_finished; });
|
||||
|
||||
if(m_thread_pool && m_thread_pool->is_alive())
|
||||
{
|
||||
m_thread_pool->destroy_threadpool();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
buffer_storage::fragment_memory()
|
||||
{
|
||||
auto* _data = m_buffer->data();
|
||||
memset(_data + m_head, 0xFFFF, buffer_size - m_head);
|
||||
*reinterpret_cast<entry_type*>(_data + m_head) = entry_type::fragmented_space;
|
||||
|
||||
size_t remaining_bytes = buffer_size - m_head - minimal_fragmented_memory_size;
|
||||
*reinterpret_cast<size_t*>(_data + m_head + sizeof(entry_type)) = remaining_bytes;
|
||||
m_head = 0;
|
||||
}
|
||||
|
||||
uint8_t*
|
||||
buffer_storage::reserve_memory_space(size_t len)
|
||||
{
|
||||
size_t _size;
|
||||
{
|
||||
std::lock_guard scope{ m_mutex };
|
||||
|
||||
if((m_head + len + minimal_fragmented_memory_size) > buffer_size)
|
||||
const bool thread_is_created_in_this_process =
|
||||
current_pid == m_worker_synchronization->origin_pid;
|
||||
if(!thread_is_created_in_this_process)
|
||||
{
|
||||
fragment_memory();
|
||||
ROCPROFSYS_DEBUG(
|
||||
"Buffer storage is not created in same process as shutting down..\n");
|
||||
return;
|
||||
}
|
||||
|
||||
std::mutex _exit_mutex;
|
||||
std::unique_lock _exit_lock{ _exit_mutex };
|
||||
m_worker_synchronization->exit_finished_condition.wait(
|
||||
_exit_lock, [&]() { return m_worker_synchronization->exit_finished.load(); });
|
||||
|
||||
if(m_flushing_thread->joinable())
|
||||
{
|
||||
m_flushing_thread->join();
|
||||
m_flushing_thread.reset();
|
||||
}
|
||||
_size = m_head;
|
||||
m_head = m_head + len;
|
||||
}
|
||||
|
||||
auto* _result = m_buffer->data() + _size;
|
||||
memset(_result, 0, len);
|
||||
return _result;
|
||||
}
|
||||
|
||||
bool
|
||||
buffer_storage::is_running() const
|
||||
{
|
||||
return m_running;
|
||||
}
|
||||
|
||||
} // namespace trace_cache
|
||||
|
||||
@@ -22,21 +22,21 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "PTL/TaskGroup.hh"
|
||||
#include "PTL/ThreadPool.hh"
|
||||
#include "cache_utility.hpp"
|
||||
#include "sample_type.hpp"
|
||||
#include <PTL/PTL.hh>
|
||||
#include "core/trace_cache/cacheable.hpp"
|
||||
|
||||
#include "common/defines.h"
|
||||
#include "core/debug.hpp"
|
||||
|
||||
#include <cassert>
|
||||
#include <condition_variable>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <type_traits>
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
namespace rocprofsys
|
||||
@@ -44,134 +44,226 @@ namespace rocprofsys
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
class cache_manager;
|
||||
using ofs_t = std::basic_ostream<char>;
|
||||
using worker_function_t = std::function<void(ofs_t& ofs, bool force)>;
|
||||
|
||||
struct worker_synchronization_t
|
||||
{
|
||||
std::condition_variable is_running_condition;
|
||||
std::atomic_bool is_running{ false };
|
||||
|
||||
std::condition_variable exit_finished_condition;
|
||||
std::atomic_bool exit_finished{ false };
|
||||
|
||||
pid_t origin_pid;
|
||||
};
|
||||
using worker_synchronization_ptr_t = std::shared_ptr<worker_synchronization_t>;
|
||||
|
||||
struct flush_worker_t
|
||||
{
|
||||
explicit flush_worker_t(worker_function_t worker_function,
|
||||
worker_synchronization_ptr_t worker_synchronization_ptr,
|
||||
std::string filepath);
|
||||
|
||||
void start(const pid_t& current_pid);
|
||||
|
||||
void stop(const pid_t& current_pid);
|
||||
|
||||
private:
|
||||
worker_function_t m_worker_function;
|
||||
worker_synchronization_ptr_t m_worker_synchronization;
|
||||
std::string m_filepath;
|
||||
std::ofstream m_ofs;
|
||||
std::unique_ptr<std::thread> m_flushing_thread;
|
||||
};
|
||||
|
||||
struct flush_worker_factory_t
|
||||
{
|
||||
using worker_t = flush_worker_t;
|
||||
|
||||
flush_worker_factory_t() = delete;
|
||||
flush_worker_factory_t(flush_worker_factory_t&) = delete;
|
||||
flush_worker_factory_t& operator=(flush_worker_factory_t&) = delete;
|
||||
flush_worker_factory_t(flush_worker_factory_t&&) = delete;
|
||||
flush_worker_factory_t& operator=(flush_worker_factory_t&&) = delete;
|
||||
|
||||
static std::shared_ptr<worker_t> get_worker(
|
||||
worker_function_t worker_function,
|
||||
const worker_synchronization_ptr_t& worker_synchronization_ptr,
|
||||
std::string filepath)
|
||||
{
|
||||
return std::make_shared<worker_t>(worker_function, worker_synchronization_ptr,
|
||||
std::move(filepath));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename WorkerFactory, typename TypeIdentifierEnum>
|
||||
class buffer_storage
|
||||
{
|
||||
public:
|
||||
static buffer_storage& get_instance();
|
||||
static_assert(type_traits::is_enum_class_v<TypeIdentifierEnum>,
|
||||
"TypeIdentifierEnum must be an enum class");
|
||||
|
||||
template <typename... T>
|
||||
void store(entry_type type, T&&... values)
|
||||
public:
|
||||
explicit buffer_storage(std::string filepath)
|
||||
: m_worker{ std::move(
|
||||
WorkerFactory::get_worker([this](ofs_t& ofs, bool force) { flush(ofs, force); },
|
||||
m_worker_synchronization, std::move(filepath))) }
|
||||
{}
|
||||
|
||||
~buffer_storage() { shutdown(); }
|
||||
|
||||
void start(const pid_t& current_pid = getpid())
|
||||
{
|
||||
if(!is_running())
|
||||
if(m_worker == nullptr)
|
||||
{
|
||||
throw std::runtime_error(
|
||||
"Trying to use buffered storage while it is not running");
|
||||
"Worker is null - unable to start buffered storage.");
|
||||
}
|
||||
|
||||
if(is_running())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
constexpr bool is_supported_type = (supported_types::is_supported<T> && ...);
|
||||
static_assert(is_supported_type,
|
||||
"Supported types are const char*, char*, "
|
||||
"unsigned long, unsigned int, long, unsigned "
|
||||
"char, std::vector<unsigned char>, double, and int.");
|
||||
|
||||
auto arg_size = get_size(values...);
|
||||
auto total_size = arg_size + sizeof(type) + sizeof(size_t);
|
||||
auto* reserved_memory = reserve_memory_space(total_size);
|
||||
size_t position = 0;
|
||||
|
||||
auto store_value = [&](const auto& val) {
|
||||
using Type = decltype(val);
|
||||
size_t len = 0;
|
||||
auto* dest = reserved_memory + position;
|
||||
if constexpr(std::is_same_v<std::decay_t<Type>, const char*>)
|
||||
{
|
||||
len = strlen(val) + 1;
|
||||
std::memcpy(dest, val, len);
|
||||
}
|
||||
else if constexpr(std::is_same_v<std::decay_t<Type>, std::vector<uint8_t>>)
|
||||
{
|
||||
size_t elem_count = val.size();
|
||||
len = elem_count + sizeof(size_t);
|
||||
std::memcpy(dest, &elem_count, sizeof(size_t));
|
||||
std::memcpy(dest + sizeof(size_t), val.data(), val.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
using ClearType = std::decay_t<decltype(val)>;
|
||||
len = sizeof(ClearType);
|
||||
*reinterpret_cast<ClearType*>(dest) = val;
|
||||
}
|
||||
position += len;
|
||||
};
|
||||
|
||||
store_value(type);
|
||||
store_value(arg_size);
|
||||
|
||||
(store_value(values), ...);
|
||||
m_worker->start(current_pid);
|
||||
}
|
||||
|
||||
void start_flushing_thread(pid_t pid);
|
||||
~buffer_storage();
|
||||
void shutdown(const pid_t& current_pid = getpid())
|
||||
{
|
||||
if(m_worker == nullptr)
|
||||
{
|
||||
throw std::runtime_error(
|
||||
"Worker is null - unable to shutdown buffered storage.");
|
||||
return;
|
||||
}
|
||||
|
||||
if(!is_running())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
m_worker->stop(current_pid);
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
auto store(const Type& value)
|
||||
{
|
||||
if(m_worker == nullptr || !is_running())
|
||||
{
|
||||
throw std::runtime_error(
|
||||
"Trying to use buffered storage while it is not running");
|
||||
}
|
||||
|
||||
type_traits::check_type<Type, TypeIdentifierEnum>();
|
||||
|
||||
using TypeIdentifierEnumUderlayingType =
|
||||
std::underlying_type_t<TypeIdentifierEnum>;
|
||||
|
||||
size_t sample_size = get_size(value);
|
||||
size_t bytes_to_reserve = header_size<TypeIdentifierEnum> + sample_size;
|
||||
auto* buf = reserve_memory_space(bytes_to_reserve);
|
||||
size_t position = 0;
|
||||
auto type_identifier_value =
|
||||
static_cast<TypeIdentifierEnumUderlayingType>(Type::type_identifier);
|
||||
|
||||
utility::store_value(type_identifier_value, buf, position);
|
||||
utility::store_value(sample_size, buf, position);
|
||||
serialize(buf + position, value);
|
||||
}
|
||||
|
||||
ROCPROFSYS_INLINE bool is_running() const
|
||||
{
|
||||
return m_worker_synchronization != nullptr &&
|
||||
m_worker_synchronization->is_running;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class cache_manager;
|
||||
buffer_storage();
|
||||
void shutdown();
|
||||
bool is_running() const;
|
||||
void fragment_memory();
|
||||
uint8_t* reserve_memory_space(size_t len);
|
||||
|
||||
template <typename... Types>
|
||||
struct typelist
|
||||
void flush(ofs_t& ofs, bool force)
|
||||
{
|
||||
template <typename T>
|
||||
constexpr static bool is_supported =
|
||||
(std::is_same_v<std::decay_t<T>, Types> || ...);
|
||||
};
|
||||
|
||||
using supported_types = typelist<const char*, char*, uint64_t, int32_t, uint32_t,
|
||||
std::vector<uint8_t>, uint8_t, int64_t, double>;
|
||||
|
||||
template <typename T>
|
||||
static constexpr bool is_string_literal_v =
|
||||
std::is_same_v<std::decay_t<T>, const char*> ||
|
||||
std::is_same_v<std::decay_t<T>, char*>;
|
||||
|
||||
template <typename T>
|
||||
constexpr size_t get_size_impl(T&& val)
|
||||
{
|
||||
if constexpr(is_string_literal_v<T>)
|
||||
size_t _head, _tail;
|
||||
{
|
||||
size_t size = 0;
|
||||
while(val[size] != '\0')
|
||||
std::lock_guard guard{ m_mutex };
|
||||
_head = m_head;
|
||||
_tail = m_tail;
|
||||
|
||||
if(_head == _tail)
|
||||
{
|
||||
size++;
|
||||
return;
|
||||
}
|
||||
return ++size;
|
||||
|
||||
auto used_space =
|
||||
m_head > m_tail ? (m_head - m_tail) : (buffer_size - m_tail + m_head);
|
||||
if(!force && used_space < flush_threshold)
|
||||
{
|
||||
return;
|
||||
}
|
||||
m_tail = m_head;
|
||||
}
|
||||
else if constexpr(std::is_same_v<std::decay_t<T>, std::vector<uint8_t>>)
|
||||
|
||||
if(_head > _tail)
|
||||
{
|
||||
return val.size() + sizeof(size_t);
|
||||
ofs.write(reinterpret_cast<const char*>(m_buffer->data() + _tail),
|
||||
_head - _tail);
|
||||
}
|
||||
else
|
||||
{
|
||||
return sizeof(T);
|
||||
ofs.write(reinterpret_cast<const char*>(m_buffer->data() + _tail),
|
||||
buffer_size - _tail);
|
||||
ofs.write(reinterpret_cast<const char*>(m_buffer->data()), _head);
|
||||
}
|
||||
if(ofs.fail())
|
||||
{
|
||||
ROCPROFSYS_WARNING(1, "Error flushing buffered storage to file for pid: %d",
|
||||
m_worker_synchronization->origin_pid);
|
||||
ROCPROFSYS_CI_THROW(true,
|
||||
"Error flushing buffered storage to file for pid: %d",
|
||||
m_worker_synchronization->origin_pid);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... T>
|
||||
constexpr size_t get_size(T&&... val)
|
||||
void fragment_memory()
|
||||
{
|
||||
auto total_size = 0;
|
||||
((total_size += get_size_impl(val)), ...);
|
||||
return total_size;
|
||||
auto* _data = m_buffer->data();
|
||||
memset(_data + m_head, std::numeric_limits<uint8_t>::max(), buffer_size - m_head);
|
||||
*reinterpret_cast<TypeIdentifierEnum*>(_data + m_head) =
|
||||
TypeIdentifierEnum::fragmented_space;
|
||||
|
||||
size_t remaining_bytes = buffer_size - m_head - header_size<TypeIdentifierEnum>;
|
||||
*reinterpret_cast<size_t*>(_data + m_head + sizeof(TypeIdentifierEnum)) =
|
||||
remaining_bytes;
|
||||
m_head = 0;
|
||||
}
|
||||
|
||||
ROCPROFSYS_INLINE uint8_t* reserve_memory_space(const size_t& number_of_bytes)
|
||||
{
|
||||
size_t _size;
|
||||
{
|
||||
std::lock_guard scope{ m_mutex };
|
||||
|
||||
if(__builtin_expect((m_head + number_of_bytes +
|
||||
header_size<TypeIdentifierEnum>) > buffer_size,
|
||||
0))
|
||||
{
|
||||
fragment_memory();
|
||||
}
|
||||
_size = m_head;
|
||||
m_head += number_of_bytes;
|
||||
}
|
||||
|
||||
return m_buffer->data() + _size;
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex m_mutex;
|
||||
std::condition_variable m_exit_condition;
|
||||
bool m_exit_finished{ false };
|
||||
bool m_running{ true };
|
||||
std::condition_variable m_shutdown_condition;
|
||||
worker_synchronization_ptr_t m_worker_synchronization{
|
||||
std::make_shared<worker_synchronization_t>()
|
||||
};
|
||||
|
||||
std::unique_ptr<PTL::ThreadPool> m_thread_pool;
|
||||
std::unique_ptr<PTL::TaskGroup<void>> m_task_group;
|
||||
size_t m_head{ 0 };
|
||||
size_t m_tail{ 0 };
|
||||
std::unique_ptr<buffer_array_t> m_buffer{ std::make_unique<buffer_array_t>() };
|
||||
pid_t m_created_process;
|
||||
std::shared_ptr<typename WorkerFactory::worker_t> m_worker;
|
||||
|
||||
std::mutex m_mutex;
|
||||
size_t m_head{ 0 };
|
||||
size_t m_tail{ 0 };
|
||||
std::unique_ptr<buffer_array_t> m_buffer{ std::make_unique<buffer_array_t>() };
|
||||
};
|
||||
|
||||
} // namespace trace_cache
|
||||
|
||||
@@ -21,14 +21,17 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#include "cache_manager.hpp"
|
||||
#include "agent_manager.hpp"
|
||||
|
||||
#include "core/trace_cache/metadata_registry.hpp"
|
||||
#include "core/trace_cache/rocpd_processor.hpp"
|
||||
#include "core/trace_cache/sample_processor.hpp"
|
||||
|
||||
#include "core/agent_manager.hpp"
|
||||
#include "core/config.hpp"
|
||||
#include "core/trace_cache/storage_parser.hpp"
|
||||
#include "debug.hpp"
|
||||
#include "core/debug.hpp"
|
||||
|
||||
#include "library/runtime.hpp"
|
||||
#include "trace_cache/cache_utility.hpp"
|
||||
#include "trace_cache/metadata_registry.hpp"
|
||||
#include "trace_cache/rocpd_post_processing.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
@@ -37,8 +40,81 @@ namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
namespace
|
||||
|
||||
namespace data
|
||||
{
|
||||
struct cache_files_t
|
||||
{
|
||||
std::string buff_storage;
|
||||
std::string metadata;
|
||||
inline bool empty() const { return buff_storage.empty() || metadata.empty(); }
|
||||
};
|
||||
|
||||
struct enabled_formats_t
|
||||
{
|
||||
bool rocpd = get_use_rocpd();
|
||||
|
||||
void print() const
|
||||
{
|
||||
constexpr std::pair<const char*, bool enabled_formats_t::*> formats[] = {
|
||||
{ "rocpd", &enabled_formats_t::rocpd },
|
||||
};
|
||||
|
||||
bool any_enabled = false;
|
||||
for(const auto& fmt : formats)
|
||||
any_enabled |= this->*(fmt.second);
|
||||
|
||||
if(!any_enabled) return;
|
||||
|
||||
bool first = true;
|
||||
std::stringstream ss;
|
||||
|
||||
for(const auto& fmt : formats)
|
||||
{
|
||||
if(this->*(fmt.second))
|
||||
{
|
||||
if(!first && sizeof(formats) > 1) ss << ", ";
|
||||
ss << fmt.first;
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
ROCPROFSYS_PRINT(
|
||||
"Generating [%s] format(s) with collected data from trace cache. This may "
|
||||
"take a while..\n",
|
||||
ss.str().c_str());
|
||||
}
|
||||
};
|
||||
|
||||
struct processor_config_t
|
||||
{
|
||||
processor_config_t(pid_t pid, pid_t ppid,
|
||||
std::shared_ptr<metadata_registry> metadata_registry_ptr,
|
||||
std::shared_ptr<agent_manager> agent_manager_ptr)
|
||||
: _pid(pid)
|
||||
, _ppid(ppid)
|
||||
, _metadata_registry(std::move(metadata_registry_ptr))
|
||||
, _agent_manager(std::move(agent_manager_ptr))
|
||||
{}
|
||||
|
||||
pid_t _pid;
|
||||
pid_t _ppid;
|
||||
|
||||
std::shared_ptr<metadata_registry> _metadata_registry;
|
||||
std::shared_ptr<agent_manager> _agent_manager;
|
||||
};
|
||||
|
||||
struct processor_storage_t
|
||||
{
|
||||
std::shared_ptr<rocpd_processor_t> rocpd_processor{ nullptr };
|
||||
};
|
||||
|
||||
using directory_files_t = std::vector<std::string>;
|
||||
using mapped_cache_files_t = std::map<pid_t, cache_files_t>;
|
||||
} // namespace data
|
||||
|
||||
namespace filesystem_utils
|
||||
{
|
||||
|
||||
void
|
||||
remove_if_exists(const std::string& fname)
|
||||
{
|
||||
@@ -64,19 +140,29 @@ remove_if_exists(const std::string& fname)
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string>
|
||||
list_dir_files(const std::string& path)
|
||||
data::directory_files_t
|
||||
list_dir_files(const std::string& _path)
|
||||
{
|
||||
DIR* dir = opendir(path.c_str());
|
||||
if(dir == nullptr)
|
||||
if(_path.empty())
|
||||
{
|
||||
ROCPROFSYS_THROW("Error opening directory: %s", path.c_str());
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<std::string> result{};
|
||||
dirent* entry;
|
||||
auto dir_deleter = [](DIR* d) {
|
||||
if(d) closedir(d);
|
||||
};
|
||||
|
||||
while((entry = readdir(dir)) != nullptr)
|
||||
std::unique_ptr<DIR, decltype(dir_deleter)> dir(opendir(_path.c_str()), dir_deleter);
|
||||
|
||||
if(!dir)
|
||||
{
|
||||
ROCPROFSYS_THROW("Error opening directory: %s", _path.c_str());
|
||||
}
|
||||
|
||||
data::directory_files_t result{};
|
||||
dirent* entry;
|
||||
|
||||
while((entry = readdir(dir.get())) != nullptr)
|
||||
{
|
||||
if(std::string(entry->d_name) != "." && std::string(entry->d_name) != "..")
|
||||
{
|
||||
@@ -84,25 +170,26 @@ list_dir_files(const std::string& path)
|
||||
}
|
||||
}
|
||||
|
||||
closedir(dir);
|
||||
return result;
|
||||
}
|
||||
|
||||
struct cache_files
|
||||
data::mapped_cache_files_t
|
||||
get_cache_files(const pid_t& root_pid,
|
||||
const data::directory_files_t& _files_from_temp_directory)
|
||||
{
|
||||
std::string buff_storage;
|
||||
std::string metadata;
|
||||
};
|
||||
if(_files_from_temp_directory.empty())
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
std::map<pid_t, cache_files>
|
||||
get_cache_files()
|
||||
{
|
||||
const auto root_pid = get_root_process_id();
|
||||
const auto tmp_files = list_dir_files("/tmp/");
|
||||
|
||||
std::map<int, cache_files> cache_map{};
|
||||
data::mapped_cache_files_t cache_map{};
|
||||
|
||||
auto parse_and_fill_cache = [&](const std::string& filename) {
|
||||
if(filename.empty())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
const std::regex buff_regex(R"(buffered_storage_(\d+)_(\d+)\.bin)");
|
||||
const std::regex meta_regex(R"(metadata_(\d+)_(\d+)\.json)");
|
||||
std::smatch match;
|
||||
@@ -113,7 +200,7 @@ get_cache_files()
|
||||
int pid = std::stoi(match[2]);
|
||||
if(parent_pid == root_pid)
|
||||
{
|
||||
cache_map[pid].buff_storage = "/tmp/" + filename;
|
||||
cache_map[pid].buff_storage = trace_cache::tmp_directory + filename;
|
||||
}
|
||||
}
|
||||
else if(std::regex_match(filename, match, meta_regex))
|
||||
@@ -122,16 +209,119 @@ get_cache_files()
|
||||
int pid = std::stoi(match[2]);
|
||||
if(parent_pid == root_pid)
|
||||
{
|
||||
cache_map[pid].metadata = "/tmp/" + filename;
|
||||
cache_map[pid].metadata = trace_cache::tmp_directory + filename;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
std::for_each(tmp_files.begin(), tmp_files.end(), parse_and_fill_cache);
|
||||
std::for_each(_files_from_temp_directory.begin(), _files_from_temp_directory.end(),
|
||||
parse_and_fill_cache);
|
||||
return cache_map;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
void
|
||||
clear_cache_files(const data::mapped_cache_files_t& _cache_files)
|
||||
{
|
||||
ROCPROFSYS_PRINT("Removing cached temporary files...\n");
|
||||
for(const auto& [_, files] : _cache_files)
|
||||
{
|
||||
ROCPROFSYS_DEBUG("Removing cached temporary file: %s\n",
|
||||
files.buff_storage.c_str());
|
||||
filesystem_utils::remove_if_exists(files.buff_storage);
|
||||
|
||||
ROCPROFSYS_DEBUG("Removing cached temporary file: %s\n", files.metadata.c_str());
|
||||
filesystem_utils::remove_if_exists(files.metadata);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace filesystem_utils
|
||||
|
||||
namespace processing_utils
|
||||
{
|
||||
[[nodiscard]] data::processor_storage_t
|
||||
configure_processors(const std::shared_ptr<sample_processor_t>& _type_processing,
|
||||
const std::shared_ptr<data::processor_config_t>& _processor_config,
|
||||
const data::enabled_formats_t& _enabled_formats)
|
||||
{
|
||||
data::processor_storage_t processor_storage;
|
||||
if(_enabled_formats.rocpd)
|
||||
{
|
||||
processor_storage.rocpd_processor = std::make_shared<rocpd_processor_t>(
|
||||
_processor_config->_metadata_registry, _processor_config->_agent_manager,
|
||||
_processor_config->_pid, _processor_config->_ppid);
|
||||
_type_processing->add_handler(*processor_storage.rocpd_processor);
|
||||
}
|
||||
return processor_storage;
|
||||
}
|
||||
|
||||
void
|
||||
process_buffered_storage(
|
||||
const std::shared_ptr<data::processor_config_t>& _processor_config,
|
||||
const std::string& _storage_filename, const data::enabled_formats_t& _enabled_formats)
|
||||
{
|
||||
auto _processor_coordinator = std::make_shared<sample_processor_t>();
|
||||
auto processor_storage =
|
||||
configure_processors(_processor_coordinator, _processor_config, _enabled_formats);
|
||||
storage_parser_t _parser(_storage_filename);
|
||||
|
||||
_processor_coordinator->prepare_for_processing();
|
||||
_parser.load(_processor_coordinator);
|
||||
_processor_coordinator->finalize_processing();
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<data::processor_config_t>>
|
||||
create_processor_configs(const data::mapped_cache_files_t& _cache_files,
|
||||
const pid_t& _root_pid)
|
||||
{
|
||||
constexpr size_t ROOT_PROCESS_INCREMENT{ 1 };
|
||||
|
||||
std::vector<std::shared_ptr<data::processor_config_t>> processor_configs;
|
||||
processor_configs.reserve(_cache_files.size() + ROOT_PROCESS_INCREMENT);
|
||||
|
||||
for(const auto& [pid, files] : _cache_files)
|
||||
{
|
||||
if(files.empty())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<agent>> _agents;
|
||||
auto _metadata = std::make_shared<metadata_registry>();
|
||||
_metadata->load_from_file(files.metadata, _agents);
|
||||
|
||||
auto _agent_manager = std::make_shared<agent_manager>(_agents);
|
||||
|
||||
processor_configs.push_back(std::make_shared<data::processor_config_t>(
|
||||
pid, _root_pid, _metadata, _agent_manager));
|
||||
}
|
||||
return processor_configs;
|
||||
}
|
||||
|
||||
void
|
||||
multithreaded_processing(
|
||||
const std::vector<std::shared_ptr<data::processor_config_t>>& _processor_configs,
|
||||
const data::enabled_formats_t& _enabled_formats)
|
||||
{
|
||||
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
|
||||
std::vector<std::thread> processing_threads;
|
||||
processing_threads.reserve(_processor_configs.size());
|
||||
for(const auto& processor_config : _processor_configs)
|
||||
{
|
||||
processing_threads.emplace_back(
|
||||
process_buffered_storage, processor_config,
|
||||
utility::get_buffered_storage_filename(processor_config->_ppid,
|
||||
processor_config->_pid),
|
||||
_enabled_formats);
|
||||
}
|
||||
|
||||
for(auto& thread : processing_threads)
|
||||
{
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace processing_utils
|
||||
|
||||
cache_manager&
|
||||
cache_manager::get_instance()
|
||||
@@ -143,90 +333,37 @@ cache_manager::get_instance()
|
||||
void
|
||||
cache_manager::post_process_bulk()
|
||||
{
|
||||
if(is_root_process())
|
||||
if(!is_root_process())
|
||||
{
|
||||
if(m_storage.is_running())
|
||||
{
|
||||
ROCPROFSYS_WARNING(2,
|
||||
"Postprocessing called without previously shutting down "
|
||||
"cache storage. Calling shutdown explicitly..\n");
|
||||
shutdown();
|
||||
}
|
||||
|
||||
auto _cache_files = get_cache_files();
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
ROCPROFSYS_PRINT(
|
||||
"Generating rocpd with collected data. This may take a while..\n");
|
||||
|
||||
std::vector<std::thread> rocpd_threads;
|
||||
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
|
||||
rocpd_threads.emplace_back([this]() {
|
||||
auto pid = getpid();
|
||||
auto ppid = get_root_process_id();
|
||||
rocpd_post_processing _post_processing(
|
||||
m_metadata, get_agent_manager_instance(), pid, ppid);
|
||||
storage_parser _parser(
|
||||
get_buffered_storage_filename(get_root_process_id(), getpid()));
|
||||
_post_processing.register_parser_callback(_parser);
|
||||
_post_processing.post_process_metadata();
|
||||
_parser.consume_storage();
|
||||
});
|
||||
|
||||
for(const auto& [pid, files] : _cache_files)
|
||||
{
|
||||
if(!files.buff_storage.empty() && !files.metadata.empty())
|
||||
{
|
||||
rocpd_threads.emplace_back([pid = pid, files = files]() {
|
||||
ROCPROFSYS_DEBUG(
|
||||
"Creating database for [%d] from buffered storage "
|
||||
"file: %s and from metadata file: %s\n",
|
||||
pid, files.buff_storage.c_str(), files.metadata.c_str());
|
||||
|
||||
std::vector<std::shared_ptr<agent>> _agents;
|
||||
metadata_registry _metadata;
|
||||
|
||||
auto res = _metadata.load_from_file(files.metadata, _agents);
|
||||
if(!res)
|
||||
{
|
||||
ROCPROFSYS_WARNING(0,
|
||||
"Load from file for metadata failed: %s\n",
|
||||
files.metadata.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
agent_manager _agent_manager{ _agents };
|
||||
auto ppid = get_root_process_id();
|
||||
rocpd_post_processing _post_processing(_metadata, _agent_manager,
|
||||
pid, ppid);
|
||||
storage_parser _parser(files.buff_storage);
|
||||
_post_processing.register_parser_callback(_parser);
|
||||
_post_processing.post_process_metadata();
|
||||
_parser.consume_storage();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
for(auto& thread : rocpd_threads)
|
||||
{
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
ROCPROFSYS_PRINT("Removing cached temporary files...\n");
|
||||
|
||||
for(const auto& [pid, files] : _cache_files)
|
||||
{
|
||||
ROCPROFSYS_PRINT("Removing cached temporary file: %s\n",
|
||||
files.buff_storage.c_str());
|
||||
ROCPROFSYS_PRINT("Removing cached temporary file: %s\n",
|
||||
files.metadata.c_str());
|
||||
remove_if_exists(files.buff_storage.c_str());
|
||||
remove_if_exists(files.metadata.c_str());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if(m_storage.is_running())
|
||||
{
|
||||
ROCPROFSYS_WARNING(2, "Postprocessing called without previously shutting down "
|
||||
"cache storage. Calling shutdown explicitly..\n");
|
||||
shutdown();
|
||||
}
|
||||
|
||||
const auto root_pid = get_root_process_id();
|
||||
const auto temp_directory_content =
|
||||
filesystem_utils::list_dir_files(trace_cache::tmp_directory);
|
||||
|
||||
const auto cache_files =
|
||||
filesystem_utils::get_cache_files(root_pid, temp_directory_content);
|
||||
const data::enabled_formats_t enabled_formats;
|
||||
enabled_formats.print();
|
||||
|
||||
auto processor_configs =
|
||||
processing_utils::create_processor_configs(cache_files, root_pid);
|
||||
|
||||
processor_configs.push_back(std::make_shared<data::processor_config_t>(
|
||||
getpid(), root_pid, m_metadata,
|
||||
std::make_shared<agent_manager>(get_agent_manager_instance().get_agents())));
|
||||
|
||||
processing_utils::multithreaded_processing(processor_configs, enabled_formats);
|
||||
|
||||
filesystem_utils::clear_cache_files(cache_files);
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@@ -22,30 +22,43 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "buffer_storage.hpp"
|
||||
#include "metadata_registry.hpp"
|
||||
#include "storage_parser.hpp"
|
||||
#include "core/trace_cache/buffer_storage.hpp"
|
||||
#include "core/trace_cache/metadata_registry.hpp"
|
||||
#include "core/trace_cache/sample_type.hpp"
|
||||
#include "core/trace_cache/storage_parser.hpp"
|
||||
#include <memory>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
using storage_parser_t =
|
||||
storage_parser<type_identifier_t, kernel_dispatch_sample, memory_copy_sample,
|
||||
memory_allocate_sample, region_sample, in_time_sample,
|
||||
pmc_event_with_sample, amd_smi_sample, cpu_freq_sample,
|
||||
backtrace_region_sample>;
|
||||
|
||||
using buffer_storage_t = buffer_storage<flush_worker_factory_t, type_identifier_t>;
|
||||
|
||||
class cache_manager
|
||||
{
|
||||
public:
|
||||
static cache_manager& get_instance();
|
||||
buffer_storage& get_buffer_storage() { return m_storage; }
|
||||
metadata_registry& get_metadata_registry() { return m_metadata; }
|
||||
buffer_storage_t& get_buffer_storage() { return m_storage; }
|
||||
metadata_registry& get_metadata_registry() { return *m_metadata; }
|
||||
void shutdown();
|
||||
void post_process_bulk();
|
||||
|
||||
private:
|
||||
void post_process_metadata();
|
||||
cache_manager() = default;
|
||||
|
||||
buffer_storage m_storage;
|
||||
metadata_registry m_metadata;
|
||||
buffer_storage_t m_storage{ utility::get_buffered_storage_filename(
|
||||
get_root_process_id(), getpid()) };
|
||||
|
||||
std::shared_ptr<metadata_registry> m_metadata{
|
||||
std::make_shared<metadata_registry>()
|
||||
};
|
||||
};
|
||||
|
||||
inline metadata_registry&
|
||||
@@ -54,7 +67,7 @@ get_metadata_registry()
|
||||
return cache_manager::get_instance().get_metadata_registry();
|
||||
}
|
||||
|
||||
inline buffer_storage&
|
||||
inline buffer_storage_t&
|
||||
get_buffer_storage()
|
||||
{
|
||||
return cache_manager::get_instance().get_buffer_storage();
|
||||
|
||||
@@ -0,0 +1,173 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
#include <string_view>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
namespace type_traits
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct always_false : std::false_type
|
||||
{};
|
||||
|
||||
} // namespace type_traits
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
serialize(uint8_t*, const T&)
|
||||
{
|
||||
static_assert(type_traits::always_false<T>::value, "serialize<T> not specialized");
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
deserialize(uint8_t*&)
|
||||
{
|
||||
static_assert(type_traits::always_false<T>::value, "deserialize<T> not specialized");
|
||||
return T{};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t
|
||||
get_size(const T&)
|
||||
{
|
||||
static_assert(type_traits::always_false<T>::value, "get_size(T) not specialized");
|
||||
return 0;
|
||||
}
|
||||
|
||||
namespace type_traits
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct tuple_to_variant;
|
||||
|
||||
template <typename... Types>
|
||||
struct tuple_to_variant<std::tuple<Types...>>
|
||||
{
|
||||
using type = std::variant<Types...>;
|
||||
};
|
||||
|
||||
template <class...>
|
||||
using void_t = void;
|
||||
|
||||
template <typename... Types>
|
||||
struct typelist
|
||||
{
|
||||
template <typename T>
|
||||
constexpr static bool is_supported = (std::is_same_v<std::decay_t<T>, Types> || ...);
|
||||
};
|
||||
|
||||
using supported_types = typelist<std::string_view, uint64_t, int32_t, uint32_t,
|
||||
std::vector<uint8_t>, uint8_t, int64_t, double>;
|
||||
|
||||
template <typename T>
|
||||
static constexpr bool is_string_view_v =
|
||||
std::is_same_v<std::decay_t<T>, std::string_view>;
|
||||
|
||||
template <typename T>
|
||||
struct is_enum_class
|
||||
: std::bool_constant<std::is_enum_v<T> &&
|
||||
!std::is_convertible_v<T, std::underlying_type_t<T>>>
|
||||
{};
|
||||
|
||||
template <typename T>
|
||||
inline constexpr bool is_enum_class_v = is_enum_class<T>::value;
|
||||
|
||||
template <typename T, typename TypeIdentifierEnum, typename = void>
|
||||
struct has_type_identifier : std::false_type
|
||||
{};
|
||||
|
||||
template <class T, typename TypeIdentifierEnum>
|
||||
struct has_type_identifier<T, TypeIdentifierEnum, void_t<decltype(T::type_identifier)>>
|
||||
: std::bool_constant<
|
||||
is_enum_class_v<TypeIdentifierEnum> &&
|
||||
std::is_convertible_v<decltype(T::type_identifier), TypeIdentifierEnum>>
|
||||
{};
|
||||
|
||||
template <typename T, typename = void>
|
||||
struct has_serialize : std::false_type
|
||||
{};
|
||||
|
||||
template <typename T>
|
||||
struct has_serialize<T, std::void_t<decltype(serialize(std::declval<uint8_t*>(),
|
||||
std::declval<const T&>()))>>
|
||||
: std::true_type
|
||||
{};
|
||||
|
||||
template <typename T, typename = void>
|
||||
struct has_deserialize : std::false_type
|
||||
{};
|
||||
|
||||
template <typename T>
|
||||
struct has_deserialize<
|
||||
T, void_t<std::is_same<decltype(deserialize<T>(std::declval<uint8_t*&>())), T>>>
|
||||
: std::true_type
|
||||
{};
|
||||
|
||||
template <typename T, typename = void>
|
||||
struct has_get_size : std::false_type
|
||||
{};
|
||||
|
||||
template <typename T>
|
||||
struct has_get_size<T, void_t<decltype(get_size(std::declval<const T&>()))>>
|
||||
: std::true_type
|
||||
{};
|
||||
|
||||
template <typename T, typename TypeIdentifierEnum>
|
||||
__attribute__((always_inline)) inline constexpr void
|
||||
check_type()
|
||||
{
|
||||
static_assert(has_serialize<T>::value, "Type doesn't have `serialize` function.");
|
||||
static_assert(has_deserialize<T>::value, "Type doesn't have `deserialize` function.");
|
||||
static_assert(has_get_size<T>::value, "Type doesn't have `get_size` function.");
|
||||
static_assert(has_type_identifier<T, TypeIdentifierEnum>::value,
|
||||
"Type doesn't have `type_identifier` member with correct type.");
|
||||
}
|
||||
|
||||
template <typename T, typename TypeIdentifierEnum, typename CacheableType,
|
||||
typename = void>
|
||||
struct has_execute_processing : std::false_type
|
||||
{};
|
||||
|
||||
template <typename T, typename TypeIdentifierEnum, typename CacheableType>
|
||||
struct has_execute_processing<
|
||||
T, TypeIdentifierEnum, CacheableType,
|
||||
void_t<decltype(std::declval<T>().execute_sample_processing(
|
||||
std::declval<TypeIdentifierEnum>(), std::declval<const CacheableType&>()))>>
|
||||
: std::bool_constant<std::is_void_v<decltype(std::declval<T>().execute_sample_processing(
|
||||
std::declval<TypeIdentifierEnum>(), std::declval<const CacheableType&>()))>>
|
||||
{};
|
||||
|
||||
} // namespace type_traits
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,176 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include "core/trace_cache/cache_type_traits.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
struct cacheable_t
|
||||
{
|
||||
cacheable_t() = default;
|
||||
};
|
||||
|
||||
constexpr size_t MByte = 1024 * 1024;
|
||||
constexpr size_t buffer_size = 100 * MByte;
|
||||
constexpr size_t flush_threshold = 80 * MByte;
|
||||
constexpr auto CACHE_FILE_FLUSH_TIMEOUT = 10ms;
|
||||
|
||||
constexpr auto ABSOLUTE = "ABS";
|
||||
constexpr auto PERCENTAGE = "%";
|
||||
|
||||
template <typename TypeIdentifierEnum>
|
||||
constexpr size_t header_size = sizeof(TypeIdentifierEnum) + sizeof(size_t);
|
||||
using buffer_array_t = std::array<uint8_t, buffer_size>;
|
||||
|
||||
const auto tmp_directory = std::string{ "/tmp/" };
|
||||
|
||||
namespace utility
|
||||
{
|
||||
|
||||
const auto get_buffered_storage_filename = [](const int& ppid, const int& pid) {
|
||||
return std::string{ tmp_directory + "buffered_storage_" + std::to_string(ppid) + "_" +
|
||||
std::to_string(pid) + ".bin" };
|
||||
};
|
||||
|
||||
const auto get_metadata_filepath = [](const int& ppid, const int& pid) {
|
||||
return std::string{ tmp_directory + "metadata_" + std::to_string(ppid) + "_" +
|
||||
std::to_string(pid) + ".json" };
|
||||
};
|
||||
|
||||
template <typename Type>
|
||||
__attribute__((always_inline)) inline constexpr size_t
|
||||
get_size(Type&& val)
|
||||
{
|
||||
using DecayedType = std::decay_t<Type>;
|
||||
static_assert(type_traits::supported_types::is_supported<DecayedType>,
|
||||
"Unsupported type in get_size");
|
||||
|
||||
if constexpr(type_traits::is_string_view_v<DecayedType> ||
|
||||
std::is_same_v<DecayedType, std::vector<uint8_t>>)
|
||||
{
|
||||
return val.size() + sizeof(size_t);
|
||||
}
|
||||
else
|
||||
{
|
||||
return sizeof(DecayedType);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Type, typename... Types>
|
||||
__attribute__((always_inline)) inline constexpr size_t
|
||||
get_size(Type&& val, Types&&... vals)
|
||||
{
|
||||
return get_size(std::forward<Type>(val)) + get_size(std::forward<Types>(vals)...);
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
__attribute__((always_inline)) inline void
|
||||
store_value(const Type& value, uint8_t* buffer, size_t& position)
|
||||
{
|
||||
using DecayedType = std::decay_t<Type>;
|
||||
static_assert(type_traits::supported_types::is_supported<DecayedType>,
|
||||
"Unsupported type in store_value");
|
||||
|
||||
auto* dest = buffer + position;
|
||||
|
||||
if constexpr(type_traits::is_string_view_v<DecayedType> ||
|
||||
std::is_same_v<DecayedType, std::vector<uint8_t>>)
|
||||
{
|
||||
const size_t elem_count = value.size();
|
||||
*reinterpret_cast<size_t*>(dest) = elem_count;
|
||||
std::memcpy(dest + sizeof(size_t), value.data(), elem_count);
|
||||
position += elem_count + sizeof(size_t);
|
||||
}
|
||||
else
|
||||
{
|
||||
*reinterpret_cast<DecayedType*>(dest) = value;
|
||||
position += sizeof(DecayedType);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Types>
|
||||
__attribute__((always_inline)) inline void
|
||||
store_value(uint8_t* buffer, const Types&... values)
|
||||
{
|
||||
size_t position = 0;
|
||||
(store_value(values, buffer, position), ...);
|
||||
}
|
||||
|
||||
template <typename Type>
|
||||
__attribute__((always_inline)) inline static void
|
||||
parse_value(uint8_t*& data_pos, Type& arg)
|
||||
{
|
||||
using DecayedType = std::decay_t<Type>;
|
||||
static_assert(type_traits::supported_types::is_supported<DecayedType>,
|
||||
"Unsupported type in parse_value");
|
||||
|
||||
if constexpr(type_traits::is_string_view_v<DecayedType>)
|
||||
{
|
||||
const size_t string_size = *reinterpret_cast<const size_t*>(data_pos);
|
||||
data_pos += sizeof(size_t);
|
||||
arg = std::string_view{ reinterpret_cast<const char*>(data_pos), string_size };
|
||||
data_pos += string_size;
|
||||
}
|
||||
else if constexpr(std::is_same_v<DecayedType, std::vector<uint8_t>>)
|
||||
{
|
||||
const size_t vector_size = *reinterpret_cast<const size_t*>(data_pos);
|
||||
data_pos += sizeof(size_t);
|
||||
arg.reserve(vector_size);
|
||||
std::copy_n(data_pos, vector_size, std::back_inserter(arg));
|
||||
data_pos += vector_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
arg = *reinterpret_cast<const DecayedType*>(data_pos);
|
||||
data_pos += sizeof(DecayedType);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Type, typename... Types>
|
||||
__attribute__((always_inline)) inline static void
|
||||
parse_value(uint8_t*& data_pos, Type& arg, Types&... args)
|
||||
{
|
||||
parse_value(data_pos, arg);
|
||||
parse_value(data_pos, args...);
|
||||
}
|
||||
|
||||
} // namespace utility
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
@@ -38,6 +38,23 @@ namespace trace_cache
|
||||
namespace
|
||||
{
|
||||
|
||||
class thread_local_string_pool
|
||||
{
|
||||
public:
|
||||
const char* store(std::string_view str)
|
||||
{
|
||||
auto [it, inserted] = m_strings.emplace(str);
|
||||
return it->c_str();
|
||||
}
|
||||
|
||||
void clear() { m_strings.clear(); }
|
||||
|
||||
private:
|
||||
std::set<std::string> m_strings;
|
||||
};
|
||||
|
||||
thread_local thread_local_string_pool g_string_pool;
|
||||
|
||||
template <typename ReturnType, typename DataType, typename Filter>
|
||||
std::optional<ReturnType>
|
||||
get_type_info(const DataType& data, const Filter& filter)
|
||||
@@ -215,12 +232,11 @@ from_json_code_object(const nlohmann::json& _json)
|
||||
rocprofiler_callback_tracing_code_object_load_data_t co = {};
|
||||
co.code_object_id = _json["code_object_id"].get<long long>();
|
||||
auto uri_str = _json["uri"].get<std::string>();
|
||||
co.uri = new char[uri_str.size() + 1];
|
||||
strncpy(const_cast<char*>(co.uri), uri_str.c_str(), uri_str.size() + 1);
|
||||
co.load_base = _json["load_base"].get<long long>();
|
||||
co.load_size = _json["load_size"].get<long long>();
|
||||
co.load_delta = _json["load_delta"].get<long long>();
|
||||
co.storage_type = static_cast<rocprofiler_code_object_storage_type_t>(
|
||||
co.uri = g_string_pool.store(uri_str);
|
||||
co.load_base = _json["load_base"].get<long long>();
|
||||
co.load_size = _json["load_size"].get<long long>();
|
||||
co.load_delta = _json["load_delta"].get<long long>();
|
||||
co.storage_type = static_cast<rocprofiler_code_object_storage_type_t>(
|
||||
_json["storage_type"].get<int>());
|
||||
auto handle = _json["agent_id_handle"].get<long long>();
|
||||
# if(ROCPROFILER_VERSION >= 600)
|
||||
@@ -255,12 +271,10 @@ rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t
|
||||
from_json_kernel_symbol(const nlohmann::json& _json)
|
||||
{
|
||||
rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t ks = {};
|
||||
ks.kernel_id = _json["kernel_id"].get<long long>();
|
||||
ks.code_object_id = _json["code_object_id"].get<long long>();
|
||||
auto kernel_name_str = _json["kernel_name"].get<std::string>();
|
||||
ks.kernel_name = new char[kernel_name_str.size() + 1];
|
||||
strncpy(const_cast<char*>(ks.kernel_name), kernel_name_str.c_str(),
|
||||
sizeof(ks.kernel_name) + 1);
|
||||
ks.kernel_id = _json["kernel_id"].get<long long>();
|
||||
ks.code_object_id = _json["code_object_id"].get<long long>();
|
||||
auto kernel_name_str = _json["kernel_name"].get<std::string>();
|
||||
ks.kernel_name = g_string_pool.store(kernel_name_str);
|
||||
ks.kernel_object = _json["kernel_object"].get<long long>();
|
||||
ks.kernarg_segment_size = _json["kernarg_segment_size"].get<int>();
|
||||
ks.kernarg_segment_alignment = _json["kernarg_segment_alignment"].get<int>();
|
||||
@@ -843,6 +857,7 @@ metadata_registry::save_to_file(const std::string& filep
|
||||
std::ofstream file(filepath);
|
||||
if(!file.is_open())
|
||||
{
|
||||
ROCPROFSYS_WARNING(1, "Error opening file for writing: %s", filepath.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -851,6 +866,7 @@ metadata_registry::save_to_file(const std::string& filep
|
||||
return true;
|
||||
} catch(const std::exception& e)
|
||||
{
|
||||
ROCPROFSYS_WARNING(1, "Error saving metadata to file: %s", e.what());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -864,6 +880,7 @@ metadata_registry::load_from_file(const std::string& filepath,
|
||||
std::ifstream file(filepath);
|
||||
if(!file.is_open())
|
||||
{
|
||||
ROCPROFSYS_WARNING(1, "Error opening file for reading: %s", filepath.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -875,6 +892,7 @@ metadata_registry::load_from_file(const std::string& filepath,
|
||||
return true;
|
||||
} catch(const std::exception& e)
|
||||
{
|
||||
ROCPROFSYS_WARNING(1, "Error loading metadata from file: %s", e.what());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -178,9 +178,14 @@ struct kernel_symbol_less
|
||||
|
||||
} // namespace info
|
||||
|
||||
class cache_manager;
|
||||
struct metadata_registry
|
||||
{
|
||||
metadata_registry();
|
||||
metadata_registry(const metadata_registry&) = delete;
|
||||
metadata_registry& operator=(const metadata_registry&) = delete;
|
||||
metadata_registry(metadata_registry&&) = delete;
|
||||
metadata_registry& operator=(metadata_registry&&) = delete;
|
||||
|
||||
void set_process(const info::process& process);
|
||||
void add_pmc_info(const info::pmc& pmc_info);
|
||||
void add_thread_info(const info::thread& thread_info);
|
||||
@@ -224,26 +229,24 @@ struct metadata_registry
|
||||
#endif
|
||||
|
||||
private:
|
||||
friend class cache_manager;
|
||||
metadata_registry();
|
||||
common::synchronized<info::process> m_process;
|
||||
common::synchronized<info::process> m_process{};
|
||||
common::synchronized<
|
||||
std::unordered_set<info::pmc, info::pmc_info_hash, info::pmc_info_equal>>
|
||||
m_pmc_infos;
|
||||
common::synchronized<std::set<info::thread>> m_threads;
|
||||
common::synchronized<std::set<info::track>> m_tracks;
|
||||
m_pmc_infos{};
|
||||
common::synchronized<std::set<info::thread>> m_threads{};
|
||||
common::synchronized<std::set<info::track>> m_tracks{};
|
||||
|
||||
common::synchronized<std::set<uint64_t>> m_streams;
|
||||
common::synchronized<std::set<uint64_t>> m_queues;
|
||||
common::synchronized<std::unordered_set<std::string_view>> m_strings;
|
||||
common::synchronized<std::set<uint64_t>> m_streams{};
|
||||
common::synchronized<std::set<uint64_t>> m_queues{};
|
||||
common::synchronized<std::unordered_set<std::string_view>> m_strings{};
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
common::synchronized<std::set<rocprofiler_callback_tracing_code_object_load_data_t,
|
||||
info::code_object_less>>
|
||||
m_code_objects;
|
||||
m_code_objects{};
|
||||
common::synchronized<
|
||||
std::set<rocprofiler_callback_tracing_code_object_kernel_symbol_register_data_t,
|
||||
info::kernel_symbol_less>>
|
||||
m_kernel_symbols;
|
||||
m_kernel_symbols{};
|
||||
rocprofiler::sdk::buffer_name_info_t<const char*> m_buffered_tracing_info{
|
||||
rocprofiler::sdk::get_buffer_tracing_names<const char*>()
|
||||
};
|
||||
|
||||
@@ -1,904 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "trace_cache/rocpd_post_processing.hpp"
|
||||
#include "agent_manager.hpp"
|
||||
#include "config.hpp"
|
||||
#include "debug.hpp"
|
||||
#include "gpu_metrics.hpp"
|
||||
#include "library/thread_info.hpp"
|
||||
#include "node_info.hpp"
|
||||
#include "rocpd/data_processor.hpp"
|
||||
#include "rocpd/data_storage/database.hpp"
|
||||
#include "trace_cache/metadata_registry.hpp"
|
||||
#include "trace_cache/sample_type.hpp"
|
||||
#include "trace_cache/storage_parser.hpp"
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <timemory/utility/demangle.hpp>
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
# include "library/rocprofiler-sdk/fwd.hpp"
|
||||
# include <rocprofiler-sdk/context.h>
|
||||
# include <rocprofiler-sdk/version.h>
|
||||
#endif
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
namespace
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto
|
||||
get_handle_from_code_object(
|
||||
const rocprofiler_callback_tracing_code_object_load_data_t& code_object)
|
||||
{
|
||||
# if(ROCPROFILER_VERSION >= 600)
|
||||
return code_object.agent_id.handle;
|
||||
# else
|
||||
return code_object.rocp_agent.handle;
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
std::shared_ptr<rocpd::data_processor>
|
||||
rocpd_post_processing::get_data_processor() const
|
||||
{
|
||||
return m_data_processor;
|
||||
}
|
||||
|
||||
postprocessing_callback
|
||||
rocpd_post_processing::get_kernel_dispatch_callback() const
|
||||
{
|
||||
return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) {
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto _kds = static_cast<const struct kernel_dispatch_sample&>(parsed);
|
||||
|
||||
auto data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto process = m_metadata.get_process_info();
|
||||
auto agent_primary_key =
|
||||
m_agent_manager.get_agent_by_handle(_kds.agent_id_handle).base_id;
|
||||
|
||||
auto thread_primary_key =
|
||||
data_processor->map_thread_id_to_primary_key(_kds.thread_id);
|
||||
|
||||
auto category_id = data_processor->insert_string(
|
||||
trait::name<category::rocm_kernel_dispatch>::value);
|
||||
|
||||
auto kernel_symbol = m_metadata.get_kernel_symbol(_kds.kernel_id);
|
||||
|
||||
if(!kernel_symbol.has_value())
|
||||
{
|
||||
throw std::runtime_error("Kernel symbol is missing for kernel dispatch");
|
||||
return;
|
||||
}
|
||||
|
||||
auto region_name_primary_key = data_processor->insert_string(
|
||||
tim::demangle(kernel_symbol->kernel_name).c_str());
|
||||
|
||||
auto stack_id = _kds.correlation_id_internal;
|
||||
auto parent_stack_id = _kds.correlation_id_ancestor;
|
||||
auto correlation_id = 0;
|
||||
|
||||
auto event_id = data_processor->insert_event(category_id, stack_id,
|
||||
parent_stack_id, correlation_id);
|
||||
|
||||
data_processor->insert_kernel_dispatch(
|
||||
n_info.id, process.pid, thread_primary_key, agent_primary_key, _kds.kernel_id,
|
||||
_kds.dispatch_id, _kds.queue_id_handle, _kds.stream_handle,
|
||||
_kds.start_timestamp, _kds.end_timestamp, _kds.private_segment_size,
|
||||
_kds.group_segment_size, _kds.workgroup_size_x, _kds.workgroup_size_y,
|
||||
_kds.workgroup_size_z, _kds.grid_size_x, _kds.grid_size_y, _kds.grid_size_z,
|
||||
region_name_primary_key, event_id);
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
postprocessing_callback
|
||||
rocpd_post_processing::get_memory_copy_callback() const
|
||||
{
|
||||
return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) {
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto _mcs = static_cast<const struct memory_copy_sample&>(parsed);
|
||||
|
||||
auto data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto process = m_metadata.get_process_info();
|
||||
|
||||
auto _name = std::string{ m_metadata.get_buffer_name_info().at(
|
||||
static_cast<rocprofiler_buffer_tracing_kind_t>(_mcs.kind),
|
||||
static_cast<rocprofiler_tracing_operation_t>(_mcs.operation)) };
|
||||
auto name_primary_key = data_processor->insert_string(_name.c_str());
|
||||
|
||||
auto category_primary_key =
|
||||
data_processor->insert_string(trait::name<category::rocm_memory_copy>::value);
|
||||
|
||||
auto thread_primary_key =
|
||||
data_processor->map_thread_id_to_primary_key(_mcs.thread_id);
|
||||
|
||||
auto dst_agent_primary_key =
|
||||
m_agent_manager.get_agent_by_handle(_mcs.dst_agent_id_handle).base_id;
|
||||
auto src_agent_primary_key =
|
||||
m_agent_manager.get_agent_by_handle(_mcs.src_agent_id_handle).base_id;
|
||||
|
||||
auto stack_id = _mcs.correlation_id_internal;
|
||||
auto parent_stack_id = _mcs.correlation_id_ancestor;
|
||||
auto correlation_id = 0;
|
||||
auto queue_id = 0;
|
||||
|
||||
auto event_primary_key = data_processor->insert_event(
|
||||
category_primary_key, stack_id, parent_stack_id, correlation_id);
|
||||
|
||||
data_processor->insert_memory_copy(
|
||||
n_info.id, process.pid, thread_primary_key, _mcs.start_timestamp,
|
||||
_mcs.end_timestamp, name_primary_key, dst_agent_primary_key,
|
||||
_mcs.dst_address_value, src_agent_primary_key, _mcs.src_address_value,
|
||||
_mcs.bytes, queue_id, _mcs.stream_handle, name_primary_key,
|
||||
event_primary_key);
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
#if(ROCPROFSYS_USE_ROCM > 0 && ROCPROFILER_VERSION >= 600)
|
||||
postprocessing_callback
|
||||
rocpd_post_processing::get_memory_allocate_callback() const
|
||||
{
|
||||
# if ROCPROFSYS_USE_ROCM > 0
|
||||
auto memtype_to_db =
|
||||
[](std::string_view memory_type) -> std::pair<std::string, std::string> {
|
||||
constexpr auto MEMORY_PREFIX = std::string_view{ "MEMORY_ALLOCATION_" };
|
||||
constexpr auto SCRATCH_PREFIX = std::string_view{ "SCRATCH_MEMORY_" };
|
||||
constexpr auto VMEM_PREFIX = std::string_view{ "VMEM_" };
|
||||
constexpr auto ASYNC_PREFIX = std::string_view{ "ASYNC_" };
|
||||
|
||||
std::string _type;
|
||||
std::string _level;
|
||||
if(memory_type.find(MEMORY_PREFIX) == 0)
|
||||
{
|
||||
_type = memory_type.substr(MEMORY_PREFIX.length());
|
||||
if(_type.find(VMEM_PREFIX) == 0)
|
||||
{
|
||||
_type = _type.substr(VMEM_PREFIX.length());
|
||||
_level = "VIRTUAL";
|
||||
}
|
||||
else
|
||||
{
|
||||
_level = "REAL";
|
||||
}
|
||||
}
|
||||
else if(memory_type.find(SCRATCH_PREFIX) == 0)
|
||||
{
|
||||
_type = memory_type.substr(SCRATCH_PREFIX.length());
|
||||
_level = "SCRATCH";
|
||||
if(memory_type.find(ASYNC_PREFIX) == 0)
|
||||
{
|
||||
_type = memory_type.substr(ASYNC_PREFIX.length()); // RECLAIM
|
||||
}
|
||||
}
|
||||
|
||||
if(_type == "ALLOCATE")
|
||||
{
|
||||
_type = "ALLOC";
|
||||
}
|
||||
|
||||
return std::make_pair(_type, _level);
|
||||
};
|
||||
# endif
|
||||
|
||||
return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) {
|
||||
# if ROCPROFSYS_USE_ROCM > 0
|
||||
auto _mas = static_cast<const struct memory_allocate_sample&>(parsed);
|
||||
auto data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto process = m_metadata.get_process_info();
|
||||
auto thread_primary_key =
|
||||
data_processor->map_thread_id_to_primary_key(_mas.thread_id);
|
||||
auto agent_primary_key = std::optional<uint64_t>{};
|
||||
|
||||
const auto invalid_context = ROCPROFILER_CONTEXT_NONE;
|
||||
if(_mas.agent_id_handle != invalid_context.handle)
|
||||
{
|
||||
{
|
||||
agent_primary_key =
|
||||
m_agent_manager.get_agent_by_handle(_mas.agent_id_handle).base_id;
|
||||
}
|
||||
const auto* _name = m_metadata.get_buffer_name_info().at(
|
||||
static_cast<rocprofiler_buffer_tracing_kind_t>(_mas.kind),
|
||||
static_cast<rocprofiler_tracing_operation_t>(_mas.operation));
|
||||
|
||||
auto [type, level] = memtype_to_db(_name);
|
||||
|
||||
auto stack_id = _mas.correlation_id_internal;
|
||||
auto parent_stack_id = _mas.correlation_id_ancestor;
|
||||
auto correlation_id = 0;
|
||||
auto queue_id = 0;
|
||||
|
||||
auto category_primary_key = data_processor->insert_string(
|
||||
trait::name<category::rocm_memory_allocate>::value);
|
||||
|
||||
auto event_primary_key = data_processor->insert_event(
|
||||
category_primary_key, stack_id, parent_stack_id, correlation_id);
|
||||
|
||||
data_processor->insert_memory_alloc(
|
||||
n_info.id, process.pid, thread_primary_key, agent_primary_key,
|
||||
type.c_str(), level.c_str(), _mas.start_timestamp, _mas.end_timestamp,
|
||||
_mas.address_value, _mas.allocation_size, queue_id, _mas.stream_handle,
|
||||
event_primary_key);
|
||||
# endif
|
||||
};
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
postprocessing_callback
|
||||
rocpd_post_processing::get_region_callback() const
|
||||
{
|
||||
[[maybe_unused]] auto parse_args = []([[maybe_unused]] const std::string& arg_str) {
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
rocprofiler_sdk::function_args_t args;
|
||||
const std::string delimiter = ";;";
|
||||
|
||||
auto split = [](const std::string& str, const std::string& _delimiter) {
|
||||
std::vector<std::string> tokens;
|
||||
size_t start = 0;
|
||||
size_t end = str.find(_delimiter);
|
||||
|
||||
while(end != std::string::npos)
|
||||
{
|
||||
tokens.push_back(str.substr(start, end - start));
|
||||
start = end + _delimiter.length();
|
||||
end = str.find(_delimiter, start);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
};
|
||||
|
||||
if(arg_str.empty())
|
||||
{
|
||||
return args;
|
||||
}
|
||||
|
||||
auto tokens = split(arg_str, delimiter);
|
||||
|
||||
// Ensure the number of tokens is a multiple of 4
|
||||
if(tokens.size() % 4 != 0)
|
||||
{
|
||||
throw std::invalid_argument("Malformed argument string.");
|
||||
}
|
||||
|
||||
for(auto it = tokens.begin(); it != tokens.end(); it += 4)
|
||||
{
|
||||
rocprofiler_sdk::argument_info arg = { static_cast<uint32_t>(std::stoi(*it)),
|
||||
*(it + 1), *(it + 2), *(it + 3) };
|
||||
args.push_back(arg);
|
||||
}
|
||||
|
||||
return args;
|
||||
#endif
|
||||
};
|
||||
|
||||
return [&]([[maybe_unused]] const storage_parsed_type_base& parsed) {
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto _rs = static_cast<const struct region_sample&>(parsed);
|
||||
auto data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto process = m_metadata.get_process_info();
|
||||
auto thread_primary_key =
|
||||
data_processor->map_thread_id_to_primary_key(_rs.thread_id);
|
||||
|
||||
auto name_primary_key = data_processor->insert_string(_rs.name.c_str());
|
||||
auto category_primary_key = data_processor->insert_string(_rs.category.c_str());
|
||||
|
||||
size_t stack_id = _rs.correlation_id_internal;
|
||||
size_t parent_stack_id = _rs.correlation_id_ancestor;
|
||||
size_t correlation_id = 0;
|
||||
|
||||
auto event_primary_key =
|
||||
data_processor->insert_event(category_primary_key, stack_id, parent_stack_id,
|
||||
correlation_id, _rs.call_stack.c_str());
|
||||
|
||||
auto args = parse_args(_rs.args_str);
|
||||
for(const auto& arg : args)
|
||||
{
|
||||
data_processor->insert_args(event_primary_key, arg.arg_number,
|
||||
arg.arg_type.c_str(), arg.arg_name.c_str(),
|
||||
arg.arg_value.c_str());
|
||||
}
|
||||
|
||||
data_processor->insert_region(n_info.id, process.pid, thread_primary_key,
|
||||
_rs.start_timestamp, _rs.end_timestamp,
|
||||
name_primary_key, event_primary_key);
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
postprocessing_callback
|
||||
rocpd_post_processing::get_backtrace_sample_callback() const
|
||||
{
|
||||
return [&](const storage_parsed_type_base& parsed) {
|
||||
auto _bts = static_cast<const struct backtrace_region_sample&>(parsed);
|
||||
auto data_processor = get_data_processor();
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto process = m_metadata.get_process_info();
|
||||
auto thread_primary_key =
|
||||
data_processor->map_thread_id_to_primary_key(_bts.thread_id);
|
||||
auto name_primary_key = data_processor->insert_string(_bts.name.c_str());
|
||||
auto category_primary_key = data_processor->insert_string(_bts.category.c_str());
|
||||
|
||||
auto event_primary_key = data_processor->insert_event(
|
||||
category_primary_key, 0, 0, 0, _bts.call_stack.c_str(),
|
||||
_bts.line_info.c_str(), _bts.extdata.c_str());
|
||||
|
||||
data_processor->insert_region(n_info.id, process.pid, thread_primary_key,
|
||||
_bts.start_timestamp, _bts.end_timestamp,
|
||||
name_primary_key, event_primary_key);
|
||||
data_processor->insert_sample(_bts.track_name.c_str(), _bts.start_timestamp,
|
||||
event_primary_key);
|
||||
};
|
||||
}
|
||||
|
||||
postprocessing_callback
|
||||
rocpd_post_processing::get_in_time_sample_callback() const
|
||||
{
|
||||
return [&](const storage_parsed_type_base& parsed) {
|
||||
auto _its = static_cast<const struct in_time_sample&>(parsed);
|
||||
auto data_processor = get_data_processor();
|
||||
auto track_primary_key = data_processor->insert_string(_its.track_name.c_str());
|
||||
|
||||
auto event_id = data_processor->insert_event(
|
||||
track_primary_key, _its.stack_id, _its.parent_stack_id, _its.correlation_id,
|
||||
_its.call_stack.c_str(), _its.line_info.c_str(), _its.event_metadata.c_str());
|
||||
data_processor->insert_sample(_its.track_name.c_str(), _its.timestamp_ns,
|
||||
event_id, "{}");
|
||||
};
|
||||
}
|
||||
postprocessing_callback
|
||||
rocpd_post_processing::get_pmc_event_with_sample_callback() const
|
||||
{
|
||||
return [&](const storage_parsed_type_base& parsed) {
|
||||
auto _pmc = static_cast<const struct pmc_event_with_sample&>(parsed);
|
||||
auto data_processor = get_data_processor();
|
||||
auto track_primary_key = data_processor->insert_string(_pmc.track_name.c_str());
|
||||
|
||||
auto agent_primary_key =
|
||||
m_agent_manager
|
||||
.get_agent_by_id(_pmc.device_id,
|
||||
static_cast<agent_type>(_pmc.device_type))
|
||||
.base_id;
|
||||
|
||||
auto event_id = data_processor->insert_event(
|
||||
track_primary_key, _pmc.stack_id, _pmc.parent_stack_id, _pmc.correlation_id,
|
||||
_pmc.call_stack.c_str(), _pmc.line_info.c_str(), _pmc.event_metadata.c_str());
|
||||
data_processor->insert_sample(_pmc.track_name.c_str(), _pmc.timestamp_ns,
|
||||
event_id, "{}");
|
||||
|
||||
data_processor->insert_pmc_event(event_id, agent_primary_key,
|
||||
_pmc.pmc_info_name.c_str(), _pmc.value);
|
||||
};
|
||||
}
|
||||
|
||||
postprocessing_callback
|
||||
rocpd_post_processing::get_amd_smi_sample_callback() const
|
||||
{
|
||||
// Use the shared gpu_metrics_t from core/gpu_metrics.hpp
|
||||
using gpu_metrics_t = gpu::gpu_metrics_t;
|
||||
|
||||
return [&](const storage_parsed_type_base& parsed) {
|
||||
auto _amd_smi = static_cast<const struct amd_smi_sample&>(parsed);
|
||||
|
||||
auto data_processor = get_data_processor();
|
||||
|
||||
const auto* _name = trait::name<category::amd_smi>::value;
|
||||
auto name_primary_key = data_processor->insert_string(_name);
|
||||
auto event_id = data_processor->insert_event(name_primary_key, 0, 0, 0);
|
||||
|
||||
auto base_id =
|
||||
m_agent_manager.get_agent_by_type_index(_amd_smi.device_id, agent_type::GPU)
|
||||
.base_id;
|
||||
|
||||
auto insert_event_and_sample = [&](bool enabled, const char* pmc_name,
|
||||
const char* track_name, double value) {
|
||||
if(!enabled) return;
|
||||
data_processor->insert_pmc_event(event_id, base_id, pmc_name, value);
|
||||
data_processor->insert_sample(track_name, _amd_smi.timestamp, event_id);
|
||||
};
|
||||
|
||||
using pos = trace_cache::amd_smi_sample::settings_positions;
|
||||
std::bitset<8> settings_bits(_amd_smi.settings);
|
||||
bool is_busy_enabled = settings_bits.test(static_cast<int>(pos::busy));
|
||||
bool is_temp_enabled = settings_bits.test(static_cast<int>(pos::temp));
|
||||
bool is_power_enabled = settings_bits.test(static_cast<int>(pos::power));
|
||||
bool is_mem_usage_enabled = settings_bits.test(static_cast<int>(pos::mem_usage));
|
||||
|
||||
bool is_vcn_enabled = settings_bits.test(static_cast<int>(pos::vcn_activity));
|
||||
bool is_jpeg_enabled = settings_bits.test(static_cast<int>(pos::jpeg_activity));
|
||||
bool is_xgmi_enabled = settings_bits.test(static_cast<int>(pos::xgmi));
|
||||
bool is_pcie_enabled = settings_bits.test(static_cast<int>(pos::pcie));
|
||||
|
||||
insert_event_and_sample(
|
||||
is_busy_enabled, trait::name<category::amd_smi_gfx_busy>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_gfx_busy>(_amd_smi.device_id)
|
||||
.c_str(),
|
||||
_amd_smi.gfx_activity);
|
||||
insert_event_and_sample(
|
||||
is_busy_enabled, trait::name<category::amd_smi_umc_busy>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_umc_busy>(_amd_smi.device_id)
|
||||
.c_str(),
|
||||
_amd_smi.umc_activity);
|
||||
insert_event_and_sample(
|
||||
is_busy_enabled, trait::name<category::amd_smi_mm_busy>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_mm_busy>(_amd_smi.device_id)
|
||||
.c_str(),
|
||||
_amd_smi.mm_activity);
|
||||
insert_event_and_sample(
|
||||
is_temp_enabled, trait::name<category::amd_smi_temp>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_temp>(_amd_smi.device_id)
|
||||
.c_str(),
|
||||
_amd_smi.temperature);
|
||||
|
||||
insert_event_and_sample(
|
||||
is_power_enabled, trait::name<category::amd_smi_power>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_power>(_amd_smi.device_id)
|
||||
.c_str(),
|
||||
_amd_smi.power);
|
||||
insert_event_and_sample(
|
||||
is_mem_usage_enabled, trait::name<category::amd_smi_memory_usage>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_memory_usage>(
|
||||
_amd_smi.device_id)
|
||||
.c_str(),
|
||||
_amd_smi.mem_usage);
|
||||
|
||||
if(!is_vcn_enabled && !is_jpeg_enabled && !is_xgmi_enabled && !is_pcie_enabled)
|
||||
return;
|
||||
|
||||
gpu_metrics_t gpu_metrics;
|
||||
gpu::gpu_metrics_capabilities_t capabilities;
|
||||
gpu::deserialize_gpu_metrics(_amd_smi.gpu_activity, gpu_metrics, is_vcn_enabled,
|
||||
is_jpeg_enabled, is_xgmi_enabled, is_pcie_enabled,
|
||||
capabilities);
|
||||
|
||||
// Insert VCN and JPEG activity metrics
|
||||
auto insert_decode_vector_metrics = [&](auto category, bool _is_enabled,
|
||||
const std::vector<uint16_t>& data,
|
||||
std::optional<size_t> _idx =
|
||||
std::nullopt) {
|
||||
if(!_is_enabled) return;
|
||||
|
||||
using Category = std::decay_t<decltype(category)>;
|
||||
|
||||
for(size_t i = 0; i < data.size(); ++i)
|
||||
{
|
||||
const auto value = data[i];
|
||||
if(value == std::numeric_limits<uint16_t>::max()) continue;
|
||||
|
||||
auto pmc_name = info::annotate_category<Category>(_idx, i);
|
||||
auto track_name =
|
||||
info::annotate_with_device_id<Category>(_amd_smi.device_id, _idx, i);
|
||||
|
||||
insert_event_and_sample(_is_enabled, pmc_name.c_str(), track_name.c_str(),
|
||||
static_cast<double>(value));
|
||||
}
|
||||
};
|
||||
|
||||
// Insert XGMI read/write data metrics
|
||||
auto insert_xgmi_vector_metrics = [&](auto category, bool _is_enabled,
|
||||
const std::vector<uint64_t>& data,
|
||||
std::optional<size_t> _idx = std::nullopt) {
|
||||
if(!_is_enabled) return;
|
||||
|
||||
using Category = std::decay_t<decltype(category)>;
|
||||
|
||||
for(size_t i = 0; i < data.size(); ++i)
|
||||
{
|
||||
const auto value = data[i];
|
||||
if(value == std::numeric_limits<uint64_t>::max()) continue;
|
||||
|
||||
auto pmc_name = info::annotate_category<Category>(_idx, i);
|
||||
auto track_name =
|
||||
info::annotate_with_device_id<Category>(_amd_smi.device_id, _idx, i);
|
||||
|
||||
insert_event_and_sample(_is_enabled, pmc_name.c_str(), track_name.c_str(),
|
||||
static_cast<double>(value));
|
||||
}
|
||||
};
|
||||
|
||||
// Insert VCN activity metrics
|
||||
if(capabilities.flags.vcn_is_device_level_only)
|
||||
{
|
||||
// Device-level: use vcn_activity vector
|
||||
insert_decode_vector_metrics(category::amd_smi_vcn_activity{}, is_vcn_enabled,
|
||||
gpu_metrics.vcn_activity, std::nullopt);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Per-XCP: iterate through actual XCPs in vcn_busy
|
||||
for(size_t xcp = 0; xcp < gpu_metrics.vcn_busy.size(); ++xcp)
|
||||
{
|
||||
insert_decode_vector_metrics(category::amd_smi_vcn_activity{},
|
||||
is_vcn_enabled, gpu_metrics.vcn_busy[xcp],
|
||||
xcp);
|
||||
}
|
||||
}
|
||||
|
||||
// Insert JPEG activity metrics
|
||||
if(capabilities.flags.jpeg_is_device_level_only)
|
||||
{
|
||||
// Device-level: use jpeg_activity vector
|
||||
insert_decode_vector_metrics(category::amd_smi_jpeg_activity{},
|
||||
is_jpeg_enabled, gpu_metrics.jpeg_activity,
|
||||
std::nullopt);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Per-XCP: iterate through actual XCPs in jpeg_busy
|
||||
for(size_t xcp = 0; xcp < gpu_metrics.jpeg_busy.size(); ++xcp)
|
||||
{
|
||||
insert_decode_vector_metrics(category::amd_smi_jpeg_activity{},
|
||||
is_jpeg_enabled, gpu_metrics.jpeg_busy[xcp],
|
||||
xcp);
|
||||
}
|
||||
}
|
||||
|
||||
// Insert XGMI metrics (scalar values)
|
||||
insert_event_and_sample(
|
||||
is_xgmi_enabled, trait::name<category::amd_smi_xgmi_link_width>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_xgmi_link_width>(
|
||||
_amd_smi.device_id)
|
||||
.c_str(),
|
||||
gpu_metrics.xgmi_link_width);
|
||||
|
||||
insert_event_and_sample(
|
||||
is_xgmi_enabled, trait::name<category::amd_smi_xgmi_link_speed>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_xgmi_link_speed>(
|
||||
_amd_smi.device_id)
|
||||
.c_str(),
|
||||
gpu_metrics.xgmi_link_speed);
|
||||
|
||||
insert_xgmi_vector_metrics(category::amd_smi_xgmi_read_data{}, is_xgmi_enabled,
|
||||
gpu_metrics.xgmi_read_data_acc, std::nullopt);
|
||||
|
||||
insert_xgmi_vector_metrics(category::amd_smi_xgmi_write_data{}, is_xgmi_enabled,
|
||||
gpu_metrics.xgmi_write_data_acc, std::nullopt);
|
||||
|
||||
insert_event_and_sample(
|
||||
is_pcie_enabled, trait::name<category::amd_smi_pcie_link_width>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_pcie_link_width>(
|
||||
_amd_smi.device_id)
|
||||
.c_str(),
|
||||
gpu_metrics.pcie_link_width);
|
||||
|
||||
insert_event_and_sample(
|
||||
is_pcie_enabled, trait::name<category::amd_smi_pcie_link_speed>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_pcie_link_speed>(
|
||||
_amd_smi.device_id)
|
||||
.c_str(),
|
||||
gpu_metrics.pcie_link_speed);
|
||||
|
||||
insert_event_and_sample(
|
||||
is_pcie_enabled, trait::name<category::amd_smi_pcie_bandwidth_acc>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_pcie_bandwidth_acc>(
|
||||
_amd_smi.device_id)
|
||||
.c_str(),
|
||||
static_cast<double>(gpu_metrics.pcie_bandwidth_acc));
|
||||
|
||||
insert_event_and_sample(
|
||||
is_pcie_enabled, trait::name<category::amd_smi_pcie_bandwidth_inst>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_pcie_bandwidth_inst>(
|
||||
_amd_smi.device_id)
|
||||
.c_str(),
|
||||
static_cast<double>(gpu_metrics.pcie_bandwidth_inst));
|
||||
};
|
||||
}
|
||||
|
||||
postprocessing_callback
|
||||
rocpd_post_processing::get_cpu_freq_sample_callback() const
|
||||
{
|
||||
struct core_freq_sample
|
||||
{
|
||||
size_t id;
|
||||
float value;
|
||||
};
|
||||
|
||||
auto deserialize_freqs = [](std::vector<uint8_t>& buffer) {
|
||||
std::vector<core_freq_sample> result;
|
||||
size_t offset = 0;
|
||||
|
||||
while(offset + sizeof(float) + sizeof(size_t) <= buffer.size())
|
||||
{
|
||||
core_freq_sample core_sample;
|
||||
std::memcpy(&core_sample.id, buffer.data() + offset, sizeof(size_t));
|
||||
offset += sizeof(size_t);
|
||||
std::memcpy(&core_sample.value, buffer.data() + offset, sizeof(float));
|
||||
offset += sizeof(float);
|
||||
result.push_back(core_sample);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
return [&](const storage_parsed_type_base& parsed) {
|
||||
auto _cpu_freq_sample = static_cast<const struct cpu_freq_sample&>(parsed);
|
||||
|
||||
auto data_processor = get_data_processor();
|
||||
const auto* _name = trait::name<category::cpu_freq>::value;
|
||||
auto name_primary_key = data_processor->insert_string(_name);
|
||||
auto event_id = data_processor->insert_event(name_primary_key, 0, 0, 0);
|
||||
|
||||
auto device_id = 0;
|
||||
|
||||
auto base_id =
|
||||
m_agent_manager.get_agent_by_type_index(device_id, agent_type::CPU).base_id;
|
||||
|
||||
auto insert_event_and_sample = [&](const char* name, double value) {
|
||||
data_processor->insert_pmc_event(event_id, base_id, name, value);
|
||||
data_processor->insert_sample(name, _cpu_freq_sample.timestamp, event_id);
|
||||
};
|
||||
|
||||
insert_event_and_sample(trait::name<category::process_page>::value,
|
||||
_cpu_freq_sample.page_rss);
|
||||
insert_event_and_sample(trait::name<category::process_virt>::value,
|
||||
_cpu_freq_sample.virt_mem_usage);
|
||||
insert_event_and_sample(trait::name<category::process_peak>::value,
|
||||
_cpu_freq_sample.peak_rss);
|
||||
insert_event_and_sample(trait::name<category::process_context_switch>::value,
|
||||
_cpu_freq_sample.context_switch_count);
|
||||
insert_event_and_sample(trait::name<category::process_page_fault>::value,
|
||||
_cpu_freq_sample.page_faults);
|
||||
insert_event_and_sample(trait::name<category::process_user_mode_time>::value,
|
||||
_cpu_freq_sample.user_mode_time);
|
||||
insert_event_and_sample(trait::name<category::process_kernel_mode_time>::value,
|
||||
_cpu_freq_sample.kernel_mode_time);
|
||||
|
||||
auto get_track_name = [](const auto& cpu_id) {
|
||||
return std::string(trait::name<category::cpu_freq>::value) + " [" +
|
||||
std::to_string(cpu_id) + "]";
|
||||
};
|
||||
|
||||
auto core_freq_samples = deserialize_freqs(_cpu_freq_sample.freqs);
|
||||
for(const auto& core : core_freq_samples)
|
||||
{
|
||||
insert_event_and_sample(get_track_name(core.id).c_str(), core.value);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
rocpd_post_processing::rocpd_post_processing(metadata_registry& md,
|
||||
agent_manager& agent_mngr, int pid, int ppid)
|
||||
: m_metadata(md)
|
||||
, m_agent_manager(agent_mngr)
|
||||
, m_data_processor(std::make_shared<rocpd::data_processor>(
|
||||
std::make_shared<rocpd::data_storage::database>(pid, ppid)))
|
||||
{}
|
||||
|
||||
void
|
||||
rocpd_post_processing::register_parser_callback([[maybe_unused]] storage_parser& parser)
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
if(!get_use_rocpd())
|
||||
{
|
||||
return;
|
||||
}
|
||||
parser.register_type_callback(entry_type::region, get_region_callback());
|
||||
parser.register_type_callback(entry_type::kernel_dispatch,
|
||||
get_kernel_dispatch_callback());
|
||||
parser.register_type_callback(entry_type::memory_copy, get_memory_copy_callback());
|
||||
# if(ROCPROFILER_VERSION >= 600)
|
||||
parser.register_type_callback(entry_type::memory_alloc,
|
||||
get_memory_allocate_callback());
|
||||
# endif
|
||||
parser.register_type_callback(entry_type::in_time_sample,
|
||||
get_in_time_sample_callback());
|
||||
parser.register_type_callback(entry_type::pmc_event_with_sample,
|
||||
get_pmc_event_with_sample_callback());
|
||||
parser.register_type_callback(entry_type::amd_smi_sample,
|
||||
get_amd_smi_sample_callback());
|
||||
parser.register_type_callback(entry_type::cpu_freq_sample,
|
||||
get_cpu_freq_sample_callback());
|
||||
parser.register_type_callback(entry_type::backtrace_region_sample,
|
||||
get_backtrace_sample_callback());
|
||||
ROCPROFSYS_DEBUG("Buffer parser callbacks are registered..\n");
|
||||
|
||||
parser.register_on_finished_callback(
|
||||
std::make_unique<std::function<void()>>([this]() {
|
||||
if(m_data_processor != nullptr)
|
||||
{
|
||||
m_data_processor->flush();
|
||||
}
|
||||
}));
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_post_processing::post_process_metadata()
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
if(!get_use_rocpd())
|
||||
{
|
||||
return;
|
||||
}
|
||||
ROCPROFSYS_DEBUG("Post processing metadata..\n");
|
||||
auto data_processor = get_data_processor();
|
||||
auto n_info = node_info::get_instance();
|
||||
|
||||
data_processor->insert_node_info(n_info.id, n_info.hash, n_info.machine_id.c_str(),
|
||||
n_info.system_name.c_str(), n_info.node_name.c_str(),
|
||||
n_info.release.c_str(), n_info.version.c_str(),
|
||||
n_info.machine.c_str(), n_info.domain_name.c_str());
|
||||
|
||||
auto process_info = m_metadata.get_process_info();
|
||||
data_processor->insert_process_info(n_info.id, process_info.ppid, process_info.pid, 0,
|
||||
0, process_info.start, process_info.end,
|
||||
process_info.command.c_str(), "{}");
|
||||
|
||||
const auto& agents = m_agent_manager.get_agents();
|
||||
int counter = 0;
|
||||
for(const auto& rocpd_agent : agents)
|
||||
{
|
||||
auto _base_id = data_processor->insert_agent(
|
||||
n_info.id, process_info.pid,
|
||||
((rocpd_agent->type == agent_type::GPU) ? "GPU" : "CPU"), counter++,
|
||||
rocpd_agent->logical_node_id, rocpd_agent->logical_node_type_id,
|
||||
rocpd_agent->device_id, rocpd_agent->name.c_str(),
|
||||
rocpd_agent->model_name.c_str(), rocpd_agent->vendor_name.c_str(),
|
||||
rocpd_agent->product_name.c_str(), "");
|
||||
rocpd_agent->base_id = _base_id;
|
||||
}
|
||||
auto _string_list = m_metadata.get_string_list();
|
||||
for(auto& _string : _string_list)
|
||||
{
|
||||
data_processor->insert_string(std::string(_string).c_str());
|
||||
}
|
||||
|
||||
auto _thread_info_list = m_metadata.get_thread_info_list();
|
||||
for(auto& t_info : _thread_info_list)
|
||||
{
|
||||
rocpd_insert_thread_id(t_info, n_info, process_info);
|
||||
}
|
||||
|
||||
auto _track_info_list = m_metadata.get_track_info_list();
|
||||
for(auto& track : _track_info_list)
|
||||
{
|
||||
auto thread_id =
|
||||
track.thread_id.has_value()
|
||||
? std::make_optional<size_t>(data_processor->map_thread_id_to_primary_key(
|
||||
track.thread_id.value()))
|
||||
: std::nullopt;
|
||||
data_processor->insert_track(track.track_name.c_str(), n_info.id,
|
||||
process_info.pid, thread_id);
|
||||
}
|
||||
|
||||
auto _code_object_list = m_metadata.get_code_object_list();
|
||||
for(const auto& code_object : _code_object_list)
|
||||
{
|
||||
auto dev_id =
|
||||
m_agent_manager.get_agent_by_handle(get_handle_from_code_object(code_object))
|
||||
.base_id;
|
||||
|
||||
const char* strg_type = "UNKNOWN";
|
||||
switch(code_object.storage_type)
|
||||
{
|
||||
case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE: strg_type = "FILE"; break;
|
||||
case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY: strg_type = "MEMORY"; break;
|
||||
default: break;
|
||||
}
|
||||
data_processor->insert_code_object(code_object.code_object_id, n_info.id,
|
||||
process_info.pid, dev_id, code_object.uri,
|
||||
code_object.load_base, code_object.load_size,
|
||||
code_object.load_delta, strg_type);
|
||||
}
|
||||
|
||||
auto _kernel_symbols_list = m_metadata.get_kernel_symbol_list();
|
||||
for(const auto& kernel_symbol : _kernel_symbols_list)
|
||||
{
|
||||
auto kernel_name = tim::demangle(kernel_symbol.kernel_name);
|
||||
data_processor->insert_kernel_symbol(
|
||||
kernel_symbol.kernel_id, n_info.id, process_info.pid,
|
||||
kernel_symbol.code_object_id, kernel_symbol.kernel_name, kernel_name.c_str(),
|
||||
kernel_symbol.kernel_object, kernel_symbol.kernarg_segment_size,
|
||||
kernel_symbol.kernarg_segment_alignment, kernel_symbol.group_segment_size,
|
||||
kernel_symbol.private_segment_size, kernel_symbol.sgpr_count,
|
||||
kernel_symbol.arch_vgpr_count, kernel_symbol.accum_vgpr_count);
|
||||
|
||||
data_processor->insert_string(kernel_name.c_str());
|
||||
}
|
||||
|
||||
auto _queue_list = m_metadata.get_queue_list();
|
||||
for(const auto& queue_handle : _queue_list)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "Queue " << queue_handle;
|
||||
data_processor->insert_queue_info(queue_handle, n_info.id, process_info.pid,
|
||||
ss.str().c_str());
|
||||
}
|
||||
|
||||
auto _stream_list = m_metadata.get_stream_list();
|
||||
for(const auto& stream_handle : _stream_list)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "Stream " << stream_handle;
|
||||
data_processor->insert_stream_info(stream_handle, n_info.id, process_info.pid,
|
||||
ss.str().c_str());
|
||||
}
|
||||
|
||||
auto buffer_info_list = m_metadata.get_buffer_name_info();
|
||||
for(const auto& buffer_info : buffer_info_list)
|
||||
{
|
||||
for(const auto& item : buffer_info.items())
|
||||
{
|
||||
data_processor->insert_string(*item.second);
|
||||
}
|
||||
}
|
||||
|
||||
auto callback_info_list = m_metadata.get_callback_tracing_info();
|
||||
for(const auto& cb_info : callback_info_list)
|
||||
{
|
||||
for(const auto& item : cb_info.items())
|
||||
{
|
||||
data_processor->insert_string(*item.second);
|
||||
}
|
||||
}
|
||||
|
||||
auto pmc_info_list = m_metadata.get_pmc_info_list();
|
||||
for(const auto& pmc_info : pmc_info_list)
|
||||
{
|
||||
const auto agent_primary_key =
|
||||
m_agent_manager
|
||||
.get_agent_by_type_index(pmc_info.agent_type_index, pmc_info.type)
|
||||
.base_id;
|
||||
|
||||
data_processor->insert_pmc_description(
|
||||
n_info.id, process_info.pid, agent_primary_key, pmc_info.target_arch.c_str(),
|
||||
pmc_info.event_code, pmc_info.instance_id, pmc_info.name.c_str(),
|
||||
pmc_info.symbol.c_str(), pmc_info.description.c_str(),
|
||||
pmc_info.long_description.c_str(), pmc_info.component.c_str(),
|
||||
pmc_info.units.c_str(), pmc_info.value_type.c_str(), pmc_info.block.c_str(),
|
||||
pmc_info.expression.c_str(), pmc_info.is_constant, pmc_info.is_derived);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void
|
||||
rocpd_post_processing::rocpd_insert_thread_id(info::thread& t_info,
|
||||
const node_info& n_info,
|
||||
const info::process& process_info) const
|
||||
{
|
||||
const auto& extended_info = thread_info::get(t_info.thread_id, SystemTID);
|
||||
if(extended_info.has_value())
|
||||
{
|
||||
t_info.start = extended_info->get_start();
|
||||
t_info.end = extended_info->get_stop();
|
||||
}
|
||||
|
||||
std::stringstream ss;
|
||||
ss << "Thread " << t_info.thread_id;
|
||||
get_data_processor()->insert_thread_info(n_info.id, process_info.ppid,
|
||||
process_info.pid, t_info.thread_id,
|
||||
ss.str().c_str(), t_info.start, t_info.end);
|
||||
}
|
||||
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
@@ -0,0 +1,827 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "core/trace_cache/rocpd_processor.hpp"
|
||||
#include "core/agent_manager.hpp"
|
||||
#include "core/config.hpp"
|
||||
#include "core/debug.hpp"
|
||||
#include "core/gpu_metrics.hpp"
|
||||
#include "core/node_info.hpp"
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "core/rocpd/data_storage/database.hpp"
|
||||
#include "core/trace_cache/metadata_registry.hpp"
|
||||
#include "core/trace_cache/sample_type.hpp"
|
||||
#include "library/thread_info.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <timemory/utility/demangle.hpp>
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
# include "library/rocprofiler-sdk/fwd.hpp"
|
||||
# include <rocprofiler-sdk/context.h>
|
||||
# include <rocprofiler-sdk/version.h>
|
||||
#endif
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
namespace
|
||||
{
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto
|
||||
get_handle_from_code_object(
|
||||
const rocprofiler_callback_tracing_code_object_load_data_t& code_object)
|
||||
{
|
||||
# if(ROCPROFILER_VERSION >= 600)
|
||||
return code_object.agent_id.handle;
|
||||
# else
|
||||
return code_object.rocp_agent.handle;
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
void
|
||||
rocpd_processor_t::handle([[maybe_unused]] const kernel_dispatch_sample& _kds)
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto process = m_metadata->get_process_info();
|
||||
auto agent_primary_key =
|
||||
m_agent_manager->get_agent_by_handle(_kds.agent_id_handle).base_id;
|
||||
|
||||
auto thread_primary_key =
|
||||
m_data_processor->map_thread_id_to_primary_key(_kds.thread_id);
|
||||
|
||||
auto category_id = m_data_processor->insert_string(
|
||||
trait::name<category::rocm_kernel_dispatch>::value);
|
||||
|
||||
auto kernel_symbol = m_metadata->get_kernel_symbol(_kds.kernel_id);
|
||||
|
||||
if(!kernel_symbol.has_value())
|
||||
{
|
||||
throw std::runtime_error("Kernel symbol is missing for kernel dispatch");
|
||||
}
|
||||
|
||||
auto region_name_primary_key = m_data_processor->insert_string(
|
||||
tim::demangle(kernel_symbol->kernel_name).c_str());
|
||||
|
||||
auto stack_id = _kds.correlation_id_internal;
|
||||
auto parent_stack_id = _kds.correlation_id_ancestor;
|
||||
auto correlation_id = 0;
|
||||
|
||||
auto event_id = m_data_processor->insert_event(category_id, stack_id, parent_stack_id,
|
||||
correlation_id);
|
||||
|
||||
m_data_processor->insert_kernel_dispatch(
|
||||
n_info.id, process.pid, thread_primary_key, agent_primary_key, _kds.kernel_id,
|
||||
_kds.dispatch_id, _kds.queue_id_handle, _kds.stream_handle, _kds.start_timestamp,
|
||||
_kds.end_timestamp, _kds.private_segment_size, _kds.group_segment_size,
|
||||
_kds.workgroup_size_x, _kds.workgroup_size_y, _kds.workgroup_size_z,
|
||||
_kds.grid_size_x, _kds.grid_size_y, _kds.grid_size_z, region_name_primary_key,
|
||||
event_id);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_processor_t::handle([[maybe_unused]] const memory_copy_sample& _mcs)
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto process = m_metadata->get_process_info();
|
||||
|
||||
auto _name = std::string{ m_metadata->get_buffer_name_info().at(
|
||||
static_cast<rocprofiler_buffer_tracing_kind_t>(_mcs.kind),
|
||||
static_cast<rocprofiler_tracing_operation_t>(_mcs.operation)) };
|
||||
auto name_primary_key = m_data_processor->insert_string(_name.c_str());
|
||||
|
||||
auto category_primary_key =
|
||||
m_data_processor->insert_string(trait::name<category::rocm_memory_copy>::value);
|
||||
|
||||
auto thread_primary_key =
|
||||
m_data_processor->map_thread_id_to_primary_key(_mcs.thread_id);
|
||||
|
||||
auto dst_agent_primary_key =
|
||||
m_agent_manager->get_agent_by_handle(_mcs.dst_agent_id_handle).base_id;
|
||||
auto src_agent_primary_key =
|
||||
m_agent_manager->get_agent_by_handle(_mcs.src_agent_id_handle).base_id;
|
||||
|
||||
auto stack_id = _mcs.correlation_id_internal;
|
||||
auto parent_stack_id = _mcs.correlation_id_ancestor;
|
||||
auto correlation_id = 0;
|
||||
auto queue_id = 0;
|
||||
|
||||
auto event_primary_key = m_data_processor->insert_event(
|
||||
category_primary_key, stack_id, parent_stack_id, correlation_id);
|
||||
|
||||
m_data_processor->insert_memory_copy(
|
||||
n_info.id, process.pid, thread_primary_key, _mcs.start_timestamp,
|
||||
_mcs.end_timestamp, name_primary_key, dst_agent_primary_key,
|
||||
_mcs.dst_address_value, src_agent_primary_key, _mcs.src_address_value, _mcs.bytes,
|
||||
queue_id, _mcs.stream_handle, name_primary_key, event_primary_key);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_processor_t::handle([[maybe_unused]] const memory_allocate_sample& _mas)
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0 && (ROCPROFILER_VERSION >= 600)
|
||||
static auto memtype_to_db =
|
||||
[](std::string_view memory_type) -> std::pair<std::string, std::string> {
|
||||
constexpr auto MEMORY_PREFIX = std::string_view{ "MEMORY_ALLOCATION_" };
|
||||
constexpr auto SCRATCH_PREFIX = std::string_view{ "SCRATCH_MEMORY_" };
|
||||
constexpr auto VMEM_PREFIX = std::string_view{ "VMEM_" };
|
||||
constexpr auto ASYNC_PREFIX = std::string_view{ "ASYNC_" };
|
||||
|
||||
std::string _type;
|
||||
std::string _level;
|
||||
if(memory_type.find(MEMORY_PREFIX) == 0)
|
||||
{
|
||||
_type = memory_type.substr(MEMORY_PREFIX.length());
|
||||
if(_type.find(VMEM_PREFIX) == 0)
|
||||
{
|
||||
_type = _type.substr(VMEM_PREFIX.length());
|
||||
_level = "VIRTUAL";
|
||||
}
|
||||
else
|
||||
{
|
||||
_level = "REAL";
|
||||
}
|
||||
}
|
||||
else if(memory_type.find(SCRATCH_PREFIX) == 0)
|
||||
{
|
||||
_type = memory_type.substr(SCRATCH_PREFIX.length());
|
||||
_level = "SCRATCH";
|
||||
if(memory_type.find(ASYNC_PREFIX) == 0)
|
||||
{
|
||||
_type = memory_type.substr(ASYNC_PREFIX.length()); // RECLAIM
|
||||
}
|
||||
}
|
||||
|
||||
if(_type == "ALLOCATE")
|
||||
{
|
||||
_type = "ALLOC";
|
||||
}
|
||||
|
||||
return std::make_pair(_type, _level);
|
||||
};
|
||||
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto process = m_metadata->get_process_info();
|
||||
auto thread_primary_key =
|
||||
m_data_processor->map_thread_id_to_primary_key(_mas.thread_id);
|
||||
auto agent_primary_key = std::optional<uint64_t>{};
|
||||
|
||||
const auto invalid_context = ROCPROFILER_CONTEXT_NONE;
|
||||
if(_mas.agent_id_handle != invalid_context.handle)
|
||||
{
|
||||
{
|
||||
agent_primary_key =
|
||||
m_agent_manager->get_agent_by_handle(_mas.agent_id_handle).base_id;
|
||||
}
|
||||
const auto* _name = m_metadata->get_buffer_name_info().at(
|
||||
static_cast<rocprofiler_buffer_tracing_kind_t>(_mas.kind),
|
||||
static_cast<rocprofiler_tracing_operation_t>(_mas.operation));
|
||||
|
||||
auto [type, level] = memtype_to_db(_name);
|
||||
|
||||
auto stack_id = _mas.correlation_id_internal;
|
||||
auto parent_stack_id = _mas.correlation_id_ancestor;
|
||||
auto correlation_id = 0;
|
||||
auto queue_id = 0;
|
||||
|
||||
auto category_primary_key = m_data_processor->insert_string(
|
||||
trait::name<category::rocm_memory_allocate>::value);
|
||||
|
||||
auto event_primary_key = m_data_processor->insert_event(
|
||||
category_primary_key, stack_id, parent_stack_id, correlation_id);
|
||||
|
||||
m_data_processor->insert_memory_alloc(
|
||||
n_info.id, process.pid, thread_primary_key, agent_primary_key, type.c_str(),
|
||||
level.c_str(), _mas.start_timestamp, _mas.end_timestamp, _mas.address_value,
|
||||
_mas.allocation_size, queue_id, _mas.stream_handle, event_primary_key);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_processor_t::handle([[maybe_unused]] const region_sample& _rs)
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
static auto parse_args = []([[maybe_unused]] const std::string& arg_str) {
|
||||
rocprofiler_sdk::function_args_t args;
|
||||
const std::string delimiter = ";;";
|
||||
|
||||
auto split = [](const std::string& str, const std::string& _delimiter) {
|
||||
std::vector<std::string> tokens;
|
||||
size_t start = 0;
|
||||
size_t end = str.find(_delimiter);
|
||||
|
||||
while(end != std::string::npos)
|
||||
{
|
||||
tokens.push_back(str.substr(start, end - start));
|
||||
start = end + _delimiter.length();
|
||||
end = str.find(_delimiter, start);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
};
|
||||
|
||||
if(arg_str.empty())
|
||||
{
|
||||
return args;
|
||||
}
|
||||
|
||||
auto tokens = split(arg_str, delimiter);
|
||||
|
||||
// Ensure the number of tokens is a multiple of 4
|
||||
if(tokens.size() % 4 != 0)
|
||||
{
|
||||
throw std::invalid_argument("Malformed argument string.");
|
||||
}
|
||||
|
||||
for(auto it = tokens.begin(); it != tokens.end(); it += 4)
|
||||
{
|
||||
rocprofiler_sdk::argument_info arg = { static_cast<uint32_t>(std::stoi(*it)),
|
||||
*(it + 1), *(it + 2), *(it + 3) };
|
||||
args.push_back(arg);
|
||||
}
|
||||
|
||||
return args;
|
||||
};
|
||||
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto process = m_metadata->get_process_info();
|
||||
auto thread_primary_key =
|
||||
m_data_processor->map_thread_id_to_primary_key(_rs.thread_id);
|
||||
|
||||
auto name_primary_key = m_data_processor->insert_string(_rs.name.c_str());
|
||||
auto category_primary_key = m_data_processor->insert_string(_rs.category.c_str());
|
||||
|
||||
size_t stack_id = _rs.correlation_id_internal;
|
||||
size_t parent_stack_id = _rs.correlation_id_ancestor;
|
||||
size_t correlation_id = 0;
|
||||
|
||||
auto event_primary_key =
|
||||
m_data_processor->insert_event(category_primary_key, stack_id, parent_stack_id,
|
||||
correlation_id, _rs.call_stack.c_str());
|
||||
|
||||
auto args = parse_args(_rs.args_str);
|
||||
for(const auto& arg : args)
|
||||
{
|
||||
m_data_processor->insert_args(event_primary_key, arg.arg_number,
|
||||
arg.arg_type.c_str(), arg.arg_name.c_str(),
|
||||
arg.arg_value.c_str());
|
||||
}
|
||||
|
||||
m_data_processor->insert_region(n_info.id, process.pid, thread_primary_key,
|
||||
_rs.start_timestamp, _rs.end_timestamp,
|
||||
name_primary_key, event_primary_key);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_processor_t::handle([[maybe_unused]] const backtrace_region_sample& _bts)
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto& n_info = node_info::get_instance();
|
||||
auto process = m_metadata->get_process_info();
|
||||
auto thread_primary_key =
|
||||
m_data_processor->map_thread_id_to_primary_key(_bts.thread_id);
|
||||
auto name_primary_key = m_data_processor->insert_string(_bts.name.c_str());
|
||||
auto category_primary_key = m_data_processor->insert_string(_bts.category.c_str());
|
||||
|
||||
auto event_primary_key = m_data_processor->insert_event(
|
||||
category_primary_key, 0, 0, 0, _bts.call_stack.c_str(), _bts.line_info.c_str(),
|
||||
_bts.extdata.c_str());
|
||||
|
||||
m_data_processor->insert_region(n_info.id, process.pid, thread_primary_key,
|
||||
_bts.start_timestamp, _bts.end_timestamp,
|
||||
name_primary_key, event_primary_key);
|
||||
m_data_processor->insert_sample(_bts.track_name.c_str(), _bts.start_timestamp,
|
||||
event_primary_key);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_processor_t::handle([[maybe_unused]] const in_time_sample& _its)
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto track_primary_key = m_data_processor->insert_string(_its.track_name.c_str());
|
||||
|
||||
auto event_id = m_data_processor->insert_event(
|
||||
track_primary_key, _its.stack_id, _its.parent_stack_id, _its.correlation_id,
|
||||
_its.call_stack.c_str(), _its.line_info.c_str(), _its.event_metadata.c_str());
|
||||
m_data_processor->insert_sample(_its.track_name.c_str(), _its.timestamp_ns, event_id,
|
||||
"{}");
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_processor_t::handle([[maybe_unused]] const pmc_event_with_sample& _pmc)
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
auto track_primary_key = m_data_processor->insert_string(_pmc.track_name.c_str());
|
||||
|
||||
auto agent_primary_key =
|
||||
m_agent_manager
|
||||
->get_agent_by_id(_pmc.device_id, static_cast<agent_type>(_pmc.device_type))
|
||||
.base_id;
|
||||
|
||||
auto event_id = m_data_processor->insert_event(
|
||||
track_primary_key, _pmc.stack_id, _pmc.parent_stack_id, _pmc.correlation_id,
|
||||
_pmc.call_stack.c_str(), _pmc.line_info.c_str(), _pmc.event_metadata.c_str());
|
||||
m_data_processor->insert_sample(_pmc.track_name.c_str(), _pmc.timestamp_ns, event_id,
|
||||
"{}");
|
||||
|
||||
m_data_processor->insert_pmc_event(event_id, agent_primary_key,
|
||||
_pmc.pmc_info_name.c_str(), _pmc.value);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_processor_t::handle([[maybe_unused]] const amd_smi_sample& _amd_smi)
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
|
||||
const auto* _name = trait::name<category::amd_smi>::value;
|
||||
auto name_primary_key = m_data_processor->insert_string(_name);
|
||||
auto event_id = m_data_processor->insert_event(name_primary_key, 0, 0, 0);
|
||||
|
||||
auto base_id =
|
||||
m_agent_manager->get_agent_by_type_index(_amd_smi.device_id, agent_type::GPU)
|
||||
.base_id;
|
||||
|
||||
auto insert_event_and_sample = [&](bool enabled, const char* pmc_name,
|
||||
const char* track_name, double value) {
|
||||
if(!enabled) return;
|
||||
m_data_processor->insert_pmc_event(event_id, base_id, pmc_name, value);
|
||||
m_data_processor->insert_sample(track_name, _amd_smi.timestamp, event_id);
|
||||
};
|
||||
|
||||
using pos = trace_cache::amd_smi_sample::settings_positions;
|
||||
std::bitset<8> settings_bits(_amd_smi.settings);
|
||||
bool is_busy_enabled = settings_bits.test(static_cast<int>(pos::busy));
|
||||
bool is_temp_enabled = settings_bits.test(static_cast<int>(pos::temp));
|
||||
bool is_power_enabled = settings_bits.test(static_cast<int>(pos::power));
|
||||
bool is_mem_usage_enabled = settings_bits.test(static_cast<int>(pos::mem_usage));
|
||||
|
||||
bool is_vcn_enabled = settings_bits.test(static_cast<int>(pos::vcn_activity));
|
||||
bool is_jpeg_enabled = settings_bits.test(static_cast<int>(pos::jpeg_activity));
|
||||
bool is_xgmi_enabled = settings_bits.test(static_cast<int>(pos::xgmi));
|
||||
bool is_pcie_enabled = settings_bits.test(static_cast<int>(pos::pcie));
|
||||
|
||||
insert_event_and_sample(
|
||||
is_busy_enabled, trait::name<category::amd_smi_gfx_busy>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_gfx_busy>(_amd_smi.device_id)
|
||||
.c_str(),
|
||||
_amd_smi.gfx_activity);
|
||||
insert_event_and_sample(
|
||||
is_busy_enabled, trait::name<category::amd_smi_umc_busy>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_umc_busy>(_amd_smi.device_id)
|
||||
.c_str(),
|
||||
_amd_smi.umc_activity);
|
||||
insert_event_and_sample(
|
||||
is_busy_enabled, trait::name<category::amd_smi_mm_busy>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_mm_busy>(_amd_smi.device_id)
|
||||
.c_str(),
|
||||
_amd_smi.mm_activity);
|
||||
insert_event_and_sample(
|
||||
is_temp_enabled, trait::name<category::amd_smi_temp>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_temp>(_amd_smi.device_id).c_str(),
|
||||
_amd_smi.temperature);
|
||||
|
||||
insert_event_and_sample(
|
||||
is_power_enabled, trait::name<category::amd_smi_power>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_power>(_amd_smi.device_id)
|
||||
.c_str(),
|
||||
_amd_smi.power);
|
||||
insert_event_and_sample(
|
||||
is_mem_usage_enabled, trait::name<category::amd_smi_memory_usage>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_memory_usage>(_amd_smi.device_id)
|
||||
.c_str(),
|
||||
_amd_smi.mem_usage);
|
||||
|
||||
if(!is_vcn_enabled && !is_jpeg_enabled && !is_xgmi_enabled && !is_pcie_enabled)
|
||||
return;
|
||||
|
||||
gpu::gpu_metrics_t gpu_metrics;
|
||||
gpu::gpu_metrics_capabilities_t capabilities;
|
||||
gpu::deserialize_gpu_metrics(_amd_smi.gpu_activity, gpu_metrics, is_vcn_enabled,
|
||||
is_jpeg_enabled, is_xgmi_enabled, is_pcie_enabled,
|
||||
capabilities);
|
||||
|
||||
// Insert VCN and JPEG activity metrics
|
||||
auto insert_decode_vector_metrics = [&](auto category, bool _is_enabled,
|
||||
const std::vector<uint16_t>& data,
|
||||
std::optional<size_t> _idx = std::nullopt) {
|
||||
if(!_is_enabled) return;
|
||||
|
||||
using Category = std::decay_t<decltype(category)>;
|
||||
|
||||
for(size_t i = 0; i < data.size(); ++i)
|
||||
{
|
||||
const auto value = data[i];
|
||||
if(value == std::numeric_limits<uint16_t>::max()) continue;
|
||||
|
||||
auto pmc_name = info::annotate_category<Category>(_idx, i);
|
||||
auto track_name =
|
||||
info::annotate_with_device_id<Category>(_amd_smi.device_id, _idx, i);
|
||||
|
||||
insert_event_and_sample(_is_enabled, pmc_name.c_str(), track_name.c_str(),
|
||||
static_cast<double>(value));
|
||||
}
|
||||
};
|
||||
|
||||
// Insert XGMI read/write data metrics
|
||||
auto insert_xgmi_vector_metrics = [&](auto category, bool _is_enabled,
|
||||
const std::vector<uint64_t>& data,
|
||||
std::optional<size_t> _idx = std::nullopt) {
|
||||
if(!_is_enabled) return;
|
||||
|
||||
using Category = std::decay_t<decltype(category)>;
|
||||
|
||||
for(size_t i = 0; i < data.size(); ++i)
|
||||
{
|
||||
const auto value = data[i];
|
||||
if(value == std::numeric_limits<uint64_t>::max()) continue;
|
||||
|
||||
auto pmc_name = info::annotate_category<Category>(_idx, i);
|
||||
auto track_name =
|
||||
info::annotate_with_device_id<Category>(_amd_smi.device_id, _idx, i);
|
||||
|
||||
insert_event_and_sample(_is_enabled, pmc_name.c_str(), track_name.c_str(),
|
||||
static_cast<double>(value));
|
||||
}
|
||||
};
|
||||
|
||||
// Insert VCN activity metrics
|
||||
if(capabilities.flags.vcn_is_device_level_only)
|
||||
{
|
||||
// Device-level: use vcn_activity vector
|
||||
insert_decode_vector_metrics(category::amd_smi_vcn_activity{}, is_vcn_enabled,
|
||||
gpu_metrics.vcn_activity, std::nullopt);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Per-XCP: iterate through actual XCPs in vcn_busy
|
||||
for(size_t xcp = 0; xcp < gpu_metrics.vcn_busy.size(); ++xcp)
|
||||
{
|
||||
insert_decode_vector_metrics(category::amd_smi_vcn_activity{}, is_vcn_enabled,
|
||||
gpu_metrics.vcn_busy[xcp], xcp);
|
||||
}
|
||||
}
|
||||
|
||||
// Insert JPEG activity metrics
|
||||
if(capabilities.flags.jpeg_is_device_level_only)
|
||||
{
|
||||
// Device-level: use jpeg_activity vector
|
||||
insert_decode_vector_metrics(category::amd_smi_jpeg_activity{}, is_jpeg_enabled,
|
||||
gpu_metrics.jpeg_activity, std::nullopt);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Per-XCP: iterate through actual XCPs in jpeg_busy
|
||||
for(size_t xcp = 0; xcp < gpu_metrics.jpeg_busy.size(); ++xcp)
|
||||
{
|
||||
insert_decode_vector_metrics(category::amd_smi_jpeg_activity{},
|
||||
is_jpeg_enabled, gpu_metrics.jpeg_busy[xcp],
|
||||
xcp);
|
||||
}
|
||||
}
|
||||
|
||||
// Insert XGMI metrics (scalar values)
|
||||
insert_event_and_sample(
|
||||
is_xgmi_enabled, trait::name<category::amd_smi_xgmi_link_width>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_xgmi_link_width>(
|
||||
_amd_smi.device_id)
|
||||
.c_str(),
|
||||
gpu_metrics.xgmi_link_width);
|
||||
|
||||
insert_event_and_sample(
|
||||
is_xgmi_enabled, trait::name<category::amd_smi_xgmi_link_speed>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_xgmi_link_speed>(
|
||||
_amd_smi.device_id)
|
||||
.c_str(),
|
||||
gpu_metrics.xgmi_link_speed);
|
||||
|
||||
insert_xgmi_vector_metrics(category::amd_smi_xgmi_read_data{}, is_xgmi_enabled,
|
||||
gpu_metrics.xgmi_read_data_acc, std::nullopt);
|
||||
|
||||
insert_xgmi_vector_metrics(category::amd_smi_xgmi_write_data{}, is_xgmi_enabled,
|
||||
gpu_metrics.xgmi_write_data_acc, std::nullopt);
|
||||
|
||||
insert_event_and_sample(
|
||||
is_pcie_enabled, trait::name<category::amd_smi_pcie_link_width>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_pcie_link_width>(
|
||||
_amd_smi.device_id)
|
||||
.c_str(),
|
||||
gpu_metrics.pcie_link_width);
|
||||
|
||||
insert_event_and_sample(
|
||||
is_pcie_enabled, trait::name<category::amd_smi_pcie_link_speed>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_pcie_link_speed>(
|
||||
_amd_smi.device_id)
|
||||
.c_str(),
|
||||
gpu_metrics.pcie_link_speed);
|
||||
|
||||
insert_event_and_sample(
|
||||
is_pcie_enabled, trait::name<category::amd_smi_pcie_bandwidth_acc>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_pcie_bandwidth_acc>(
|
||||
_amd_smi.device_id)
|
||||
.c_str(),
|
||||
static_cast<double>(gpu_metrics.pcie_bandwidth_acc));
|
||||
|
||||
insert_event_and_sample(
|
||||
is_pcie_enabled, trait::name<category::amd_smi_pcie_bandwidth_inst>::value,
|
||||
info::annotate_with_device_id<category::amd_smi_pcie_bandwidth_inst>(
|
||||
_amd_smi.device_id)
|
||||
.c_str(),
|
||||
static_cast<double>(gpu_metrics.pcie_bandwidth_inst));
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_processor_t::handle([[maybe_unused]] const cpu_freq_sample& _cpu_freq_sample)
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
struct core_freq_sample
|
||||
{
|
||||
size_t id;
|
||||
float value;
|
||||
};
|
||||
|
||||
auto deserialize_freqs = [](const std::vector<uint8_t>& buffer) {
|
||||
std::vector<core_freq_sample> result;
|
||||
size_t offset = 0;
|
||||
|
||||
while(offset + sizeof(float) + sizeof(size_t) <= buffer.size())
|
||||
{
|
||||
core_freq_sample core_sample;
|
||||
std::memcpy(&core_sample.id, buffer.data() + offset, sizeof(size_t));
|
||||
offset += sizeof(size_t);
|
||||
std::memcpy(&core_sample.value, buffer.data() + offset, sizeof(float));
|
||||
offset += sizeof(float);
|
||||
result.push_back(core_sample);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
const auto* _name = trait::name<category::cpu_freq>::value;
|
||||
auto name_primary_key = m_data_processor->insert_string(_name);
|
||||
auto event_id = m_data_processor->insert_event(name_primary_key, 0, 0, 0);
|
||||
|
||||
auto device_id = 0;
|
||||
|
||||
auto base_id =
|
||||
m_agent_manager->get_agent_by_type_index(device_id, agent_type::CPU).base_id;
|
||||
|
||||
auto insert_event_and_sample = [&](const char* name, double value) {
|
||||
m_data_processor->insert_pmc_event(event_id, base_id, name, value);
|
||||
m_data_processor->insert_sample(name, _cpu_freq_sample.timestamp, event_id);
|
||||
};
|
||||
|
||||
insert_event_and_sample(trait::name<category::process_page>::value,
|
||||
_cpu_freq_sample.page_rss);
|
||||
insert_event_and_sample(trait::name<category::process_virt>::value,
|
||||
_cpu_freq_sample.virt_mem_usage);
|
||||
insert_event_and_sample(trait::name<category::process_peak>::value,
|
||||
_cpu_freq_sample.peak_rss);
|
||||
insert_event_and_sample(trait::name<category::process_context_switch>::value,
|
||||
_cpu_freq_sample.context_switch_count);
|
||||
insert_event_and_sample(trait::name<category::process_page_fault>::value,
|
||||
_cpu_freq_sample.page_faults);
|
||||
insert_event_and_sample(trait::name<category::process_user_mode_time>::value,
|
||||
_cpu_freq_sample.user_mode_time);
|
||||
insert_event_and_sample(trait::name<category::process_kernel_mode_time>::value,
|
||||
_cpu_freq_sample.kernel_mode_time);
|
||||
|
||||
auto get_track_name = [](const auto& cpu_id) {
|
||||
return std::string(trait::name<category::cpu_freq>::value) + " [" +
|
||||
std::to_string(cpu_id) + "]";
|
||||
};
|
||||
|
||||
auto core_freq_samples = deserialize_freqs(_cpu_freq_sample.freqs);
|
||||
for(const auto& core : core_freq_samples)
|
||||
{
|
||||
insert_event_and_sample(get_track_name(core.id).c_str(), core.value);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
rocpd_processor_t::rocpd_processor_t(const std::shared_ptr<metadata_registry>& md,
|
||||
const std::shared_ptr<agent_manager>& agent_mngr,
|
||||
int pid, int ppid)
|
||||
: processor_t<rocpd_processor_t>()
|
||||
, m_metadata(md)
|
||||
, m_agent_manager(agent_mngr)
|
||||
, m_data_processor(std::make_shared<rocpd::data_processor>(
|
||||
std::make_shared<rocpd::data_storage::database>(pid, ppid)))
|
||||
{}
|
||||
|
||||
void
|
||||
rocpd_processor_t::prepare_for_processing()
|
||||
{
|
||||
post_process_metadata();
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_processor_t::finalize_processing()
|
||||
{
|
||||
m_data_processor->flush();
|
||||
}
|
||||
|
||||
void
|
||||
rocpd_processor_t::post_process_metadata()
|
||||
{
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
if(!get_use_rocpd())
|
||||
{
|
||||
return;
|
||||
}
|
||||
ROCPROFSYS_DEBUG("Post processing metadata..\n");
|
||||
auto n_info = node_info::get_instance();
|
||||
|
||||
m_data_processor->insert_node_info(
|
||||
n_info.id, n_info.hash, n_info.machine_id.c_str(), n_info.system_name.c_str(),
|
||||
n_info.node_name.c_str(), n_info.release.c_str(), n_info.version.c_str(),
|
||||
n_info.machine.c_str(), n_info.domain_name.c_str());
|
||||
|
||||
auto process_info = m_metadata->get_process_info();
|
||||
m_data_processor->insert_process_info(n_info.id, process_info.ppid, process_info.pid,
|
||||
0, 0, process_info.start, process_info.end,
|
||||
process_info.command.c_str(), "{}");
|
||||
|
||||
const auto& agents = m_agent_manager->get_agents();
|
||||
int counter = 0;
|
||||
for(const auto& rocpd_agent : agents)
|
||||
{
|
||||
auto _base_id = m_data_processor->insert_agent(
|
||||
n_info.id, process_info.pid,
|
||||
((rocpd_agent->type == agent_type::GPU) ? "GPU" : "CPU"), counter++,
|
||||
rocpd_agent->logical_node_id, rocpd_agent->logical_node_type_id,
|
||||
rocpd_agent->device_id, rocpd_agent->name.c_str(),
|
||||
rocpd_agent->model_name.c_str(), rocpd_agent->vendor_name.c_str(),
|
||||
rocpd_agent->product_name.c_str(), "");
|
||||
rocpd_agent->base_id = _base_id;
|
||||
}
|
||||
auto _string_list = m_metadata->get_string_list();
|
||||
for(auto& _string : _string_list)
|
||||
{
|
||||
m_data_processor->insert_string(std::string(_string).c_str());
|
||||
}
|
||||
|
||||
auto _thread_info_list = m_metadata->get_thread_info_list();
|
||||
for(auto& t_info : _thread_info_list)
|
||||
{
|
||||
insert_thread_id(t_info, n_info, process_info);
|
||||
}
|
||||
|
||||
auto _track_info_list = m_metadata->get_track_info_list();
|
||||
for(auto& track : _track_info_list)
|
||||
{
|
||||
auto thread_id = track.thread_id.has_value()
|
||||
? std::make_optional<size_t>(
|
||||
m_data_processor->map_thread_id_to_primary_key(
|
||||
track.thread_id.value()))
|
||||
: std::nullopt;
|
||||
m_data_processor->insert_track(track.track_name.c_str(), n_info.id,
|
||||
process_info.pid, thread_id);
|
||||
}
|
||||
|
||||
auto _code_object_list = m_metadata->get_code_object_list();
|
||||
for(const auto& code_object : _code_object_list)
|
||||
{
|
||||
auto dev_id =
|
||||
m_agent_manager->get_agent_by_handle(get_handle_from_code_object(code_object))
|
||||
.base_id;
|
||||
|
||||
const char* strg_type = "UNKNOWN";
|
||||
switch(code_object.storage_type)
|
||||
{
|
||||
case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_FILE: strg_type = "FILE"; break;
|
||||
case ROCPROFILER_CODE_OBJECT_STORAGE_TYPE_MEMORY: strg_type = "MEMORY"; break;
|
||||
default: break;
|
||||
}
|
||||
m_data_processor->insert_code_object(code_object.code_object_id, n_info.id,
|
||||
process_info.pid, dev_id, code_object.uri,
|
||||
code_object.load_base, code_object.load_size,
|
||||
code_object.load_delta, strg_type);
|
||||
}
|
||||
|
||||
auto _kernel_symbols_list = m_metadata->get_kernel_symbol_list();
|
||||
for(const auto& kernel_symbol : _kernel_symbols_list)
|
||||
{
|
||||
auto kernel_name = tim::demangle(kernel_symbol.kernel_name);
|
||||
m_data_processor->insert_kernel_symbol(
|
||||
kernel_symbol.kernel_id, n_info.id, process_info.pid,
|
||||
kernel_symbol.code_object_id, kernel_symbol.kernel_name, kernel_name.c_str(),
|
||||
kernel_symbol.kernel_object, kernel_symbol.kernarg_segment_size,
|
||||
kernel_symbol.kernarg_segment_alignment, kernel_symbol.group_segment_size,
|
||||
kernel_symbol.private_segment_size, kernel_symbol.sgpr_count,
|
||||
kernel_symbol.arch_vgpr_count, kernel_symbol.accum_vgpr_count);
|
||||
|
||||
m_data_processor->insert_string(kernel_name.c_str());
|
||||
}
|
||||
|
||||
auto _queue_list = m_metadata->get_queue_list();
|
||||
for(const auto& queue_handle : _queue_list)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "Queue " << queue_handle;
|
||||
m_data_processor->insert_queue_info(queue_handle, n_info.id, process_info.pid,
|
||||
ss.str().c_str());
|
||||
}
|
||||
|
||||
auto _stream_list = m_metadata->get_stream_list();
|
||||
for(const auto& stream_handle : _stream_list)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "Stream " << stream_handle;
|
||||
m_data_processor->insert_stream_info(stream_handle, n_info.id, process_info.pid,
|
||||
ss.str().c_str());
|
||||
}
|
||||
|
||||
auto buffer_info_list = m_metadata->get_buffer_name_info();
|
||||
for(const auto& buffer_info : buffer_info_list)
|
||||
{
|
||||
for(const auto& item : buffer_info.items())
|
||||
{
|
||||
m_data_processor->insert_string(*item.second);
|
||||
}
|
||||
}
|
||||
|
||||
auto callback_info_list = m_metadata->get_callback_tracing_info();
|
||||
for(const auto& cb_info : callback_info_list)
|
||||
{
|
||||
for(const auto& item : cb_info.items())
|
||||
{
|
||||
m_data_processor->insert_string(*item.second);
|
||||
}
|
||||
}
|
||||
|
||||
auto pmc_info_list = m_metadata->get_pmc_info_list();
|
||||
for(const auto& pmc_info : pmc_info_list)
|
||||
{
|
||||
const auto agent_primary_key =
|
||||
m_agent_manager
|
||||
->get_agent_by_type_index(pmc_info.agent_type_index, pmc_info.type)
|
||||
.base_id;
|
||||
|
||||
m_data_processor->insert_pmc_description(
|
||||
n_info.id, process_info.pid, agent_primary_key, pmc_info.target_arch.c_str(),
|
||||
pmc_info.event_code, pmc_info.instance_id, pmc_info.name.c_str(),
|
||||
pmc_info.symbol.c_str(), pmc_info.description.c_str(),
|
||||
pmc_info.long_description.c_str(), pmc_info.component.c_str(),
|
||||
pmc_info.units.c_str(), pmc_info.value_type.c_str(), pmc_info.block.c_str(),
|
||||
pmc_info.expression.c_str(), pmc_info.is_constant, pmc_info.is_derived);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void
|
||||
rocpd_processor_t::insert_thread_id(info::thread& t_info, const node_info& n_info,
|
||||
const info::process& process_info)
|
||||
{
|
||||
const auto& extended_info = thread_info::get(t_info.thread_id, SystemTID);
|
||||
if(extended_info.has_value())
|
||||
{
|
||||
t_info.start = extended_info->get_start();
|
||||
t_info.end = extended_info->get_stop();
|
||||
}
|
||||
|
||||
std::stringstream ss;
|
||||
ss << "Thread " << t_info.thread_id;
|
||||
m_data_processor->insert_thread_info(n_info.id, process_info.ppid, process_info.pid,
|
||||
t_info.thread_id, ss.str().c_str(), t_info.start,
|
||||
t_info.end);
|
||||
}
|
||||
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
+23
-23
@@ -25,44 +25,44 @@
|
||||
#include "core/node_info.hpp"
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "core/trace_cache/metadata_registry.hpp"
|
||||
#include "core/trace_cache/storage_parser.hpp"
|
||||
#include "core/trace_cache/sample_processor.hpp"
|
||||
|
||||
#include "trace_cache/sample_type.hpp"
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
class rocpd_post_processing
|
||||
class rocpd_processor_t : public processor_t<rocpd_processor_t>
|
||||
{
|
||||
public:
|
||||
rocpd_post_processing(metadata_registry& metadata, agent_manager& agent_mngr, int pid,
|
||||
int ppid);
|
||||
rocpd_processor_t(const std::shared_ptr<metadata_registry>& metadata,
|
||||
const std::shared_ptr<agent_manager>& agent_mngr, int pid,
|
||||
int ppid);
|
||||
|
||||
void register_parser_callback(storage_parser& parser);
|
||||
void post_process_metadata();
|
||||
void prepare_for_processing();
|
||||
void finalize_processing();
|
||||
|
||||
std::shared_ptr<rocpd::data_processor> get_data_processor() const;
|
||||
void handle(const kernel_dispatch_sample& sample);
|
||||
void handle(const memory_copy_sample& sample);
|
||||
void handle(const memory_allocate_sample& sample);
|
||||
void handle(const region_sample& sample);
|
||||
void handle(const in_time_sample& sample);
|
||||
void handle(const pmc_event_with_sample& sample);
|
||||
void handle(const amd_smi_sample& sample);
|
||||
void handle(const cpu_freq_sample& sample);
|
||||
void handle(const backtrace_region_sample& sample);
|
||||
|
||||
private:
|
||||
using primary_key = size_t;
|
||||
|
||||
inline void rocpd_insert_thread_id(info::thread& t_info, const node_info& n_info,
|
||||
const info::process& process_info) const;
|
||||
void post_process_metadata();
|
||||
inline void insert_thread_id(info::thread& t_info, const node_info& n_info,
|
||||
const info::process& process_info);
|
||||
|
||||
postprocessing_callback get_kernel_dispatch_callback() const;
|
||||
postprocessing_callback get_memory_copy_callback() const;
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
postprocessing_callback get_memory_allocate_callback() const;
|
||||
#endif
|
||||
postprocessing_callback get_region_callback() const;
|
||||
postprocessing_callback get_in_time_sample_callback() const;
|
||||
postprocessing_callback get_pmc_event_with_sample_callback() const;
|
||||
postprocessing_callback get_amd_smi_sample_callback() const;
|
||||
postprocessing_callback get_cpu_freq_sample_callback() const;
|
||||
postprocessing_callback get_backtrace_sample_callback() const;
|
||||
|
||||
metadata_registry& m_metadata;
|
||||
agent_manager& m_agent_manager;
|
||||
std::shared_ptr<metadata_registry> m_metadata;
|
||||
std::shared_ptr<agent_manager> m_agent_manager;
|
||||
std::shared_ptr<rocpd::data_processor> m_data_processor;
|
||||
};
|
||||
|
||||
@@ -0,0 +1,310 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include "core/trace_cache/cacheable.hpp"
|
||||
#include "core/trace_cache/sample_type.hpp"
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
# include <rocprofiler-sdk/version.h>
|
||||
#endif
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct processor_t
|
||||
{
|
||||
void handle(const kernel_dispatch_sample& sample)
|
||||
{
|
||||
static_cast<T*>(this)->handle(sample);
|
||||
}
|
||||
|
||||
void handle(const memory_copy_sample& sample)
|
||||
{
|
||||
static_cast<T*>(this)->handle(sample);
|
||||
}
|
||||
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
void handle(const memory_allocate_sample& sample)
|
||||
{
|
||||
static_cast<T*>(this)->handle(sample);
|
||||
}
|
||||
#endif
|
||||
|
||||
void handle(const region_sample& sample) { static_cast<T*>(this)->handle(sample); }
|
||||
|
||||
void handle(const in_time_sample& sample) { static_cast<T*>(this)->handle(sample); }
|
||||
|
||||
void handle(const pmc_event_with_sample& sample)
|
||||
{
|
||||
static_cast<T*>(this)->handle(sample);
|
||||
}
|
||||
|
||||
void handle(const amd_smi_sample& sample) { static_cast<T*>(this)->handle(sample); }
|
||||
|
||||
void handle(const cpu_freq_sample& sample) { static_cast<T*>(this)->handle(sample); }
|
||||
|
||||
void handle(const backtrace_region_sample& sample)
|
||||
{
|
||||
static_cast<T*>(this)->handle(sample);
|
||||
}
|
||||
|
||||
void prepare_for_processing() { static_cast<T*>(this)->prepare_for_processing(); }
|
||||
|
||||
void finalize_processing() { static_cast<T*>(this)->finalize_processing(); }
|
||||
|
||||
protected:
|
||||
~processor_t() = default;
|
||||
};
|
||||
|
||||
struct processor_view_t
|
||||
{
|
||||
using kernel_dispatch_fn_t = void (*)(void*, const kernel_dispatch_sample&) noexcept;
|
||||
using memory_copy_fn_t = void (*)(void*, const memory_copy_sample&) noexcept;
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
using memory_allocate_fn_t = void (*)(void*, const memory_allocate_sample&) noexcept;
|
||||
#endif
|
||||
using region_fn_t = void (*)(void*, const region_sample&) noexcept;
|
||||
using in_time_sample_fn_t = void (*)(void*, const in_time_sample&) noexcept;
|
||||
using pmc_event_fn_t = void (*)(void*, const pmc_event_with_sample&) noexcept;
|
||||
using amd_smi_sample_fn_t = void (*)(void*, const amd_smi_sample&) noexcept;
|
||||
using cpu_freq_sample_fn_t = void (*)(void*, const cpu_freq_sample&) noexcept;
|
||||
using backtrace_region_fn_t = void (*)(void*,
|
||||
const backtrace_region_sample&) noexcept;
|
||||
using prepare_for_processing_fn_t = void (*)(void*) noexcept;
|
||||
using finalize_processing_fn_t = void (*)(void*) noexcept;
|
||||
|
||||
struct vtable_t
|
||||
{
|
||||
kernel_dispatch_fn_t handle_kernel_dispatch;
|
||||
memory_copy_fn_t handle_memory_copy;
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
memory_allocate_fn_t handle_memory_allocate;
|
||||
#endif
|
||||
region_fn_t handle_region;
|
||||
in_time_sample_fn_t handle_in_time_sample;
|
||||
pmc_event_fn_t handle_pmc_event;
|
||||
amd_smi_sample_fn_t handle_amd_smi_sample;
|
||||
cpu_freq_sample_fn_t handle_cpu_freq_sample;
|
||||
backtrace_region_fn_t handle_backtrace_region;
|
||||
prepare_for_processing_fn_t prepare_for_processing;
|
||||
finalize_processing_fn_t finalize_processing;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
explicit processor_view_t(T& t) noexcept
|
||||
: m_object{ std::addressof(t) }
|
||||
, m_vtable{ std::addressof(get_vtable_for_type<T>()) }
|
||||
{
|
||||
static_assert(std::is_base_of<processor_t<T>, T>::value,
|
||||
"Type must be derived from processor_t<T>");
|
||||
}
|
||||
|
||||
processor_view_t(const processor_view_t&) noexcept = default;
|
||||
processor_view_t(processor_view_t&&) noexcept = default;
|
||||
processor_view_t& operator=(const processor_view_t&) noexcept = default;
|
||||
processor_view_t& operator=(processor_view_t&&) noexcept = default;
|
||||
|
||||
ROCPROFSYS_INLINE void handle(const kernel_dispatch_sample& sample) const noexcept
|
||||
{
|
||||
m_vtable->handle_kernel_dispatch(m_object, sample);
|
||||
}
|
||||
|
||||
ROCPROFSYS_INLINE void handle(const memory_copy_sample& sample) const noexcept
|
||||
{
|
||||
m_vtable->handle_memory_copy(m_object, sample);
|
||||
}
|
||||
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
ROCPROFSYS_INLINE void handle(const memory_allocate_sample& sample) const noexcept
|
||||
{
|
||||
m_vtable->handle_memory_allocate(m_object, sample);
|
||||
}
|
||||
#endif
|
||||
|
||||
ROCPROFSYS_INLINE void handle(const region_sample& sample) const noexcept
|
||||
{
|
||||
m_vtable->handle_region(m_object, sample);
|
||||
}
|
||||
|
||||
ROCPROFSYS_INLINE void handle(const in_time_sample& sample) const noexcept
|
||||
{
|
||||
m_vtable->handle_in_time_sample(m_object, sample);
|
||||
}
|
||||
|
||||
ROCPROFSYS_INLINE void handle(const pmc_event_with_sample& sample) const noexcept
|
||||
{
|
||||
m_vtable->handle_pmc_event(m_object, sample);
|
||||
}
|
||||
|
||||
ROCPROFSYS_INLINE void handle(const amd_smi_sample& sample) const noexcept
|
||||
{
|
||||
m_vtable->handle_amd_smi_sample(m_object, sample);
|
||||
}
|
||||
|
||||
ROCPROFSYS_INLINE void handle(const cpu_freq_sample& sample) const noexcept
|
||||
{
|
||||
m_vtable->handle_cpu_freq_sample(m_object, sample);
|
||||
}
|
||||
|
||||
ROCPROFSYS_INLINE void handle(const backtrace_region_sample& sample) const noexcept
|
||||
{
|
||||
m_vtable->handle_backtrace_region(m_object, sample);
|
||||
}
|
||||
|
||||
ROCPROFSYS_INLINE void prepare_for_processing() const noexcept
|
||||
{
|
||||
m_vtable->prepare_for_processing(m_object);
|
||||
}
|
||||
|
||||
ROCPROFSYS_INLINE void finalize_processing() const noexcept
|
||||
{
|
||||
m_vtable->finalize_processing(m_object);
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
static inline const vtable_t& get_vtable_for_type() noexcept
|
||||
{
|
||||
static const vtable_t vtable{
|
||||
+[](void* obj, const kernel_dispatch_sample& sample) noexcept {
|
||||
static_cast<T*>(obj)->handle(sample);
|
||||
},
|
||||
+[](void* obj, const memory_copy_sample& sample) noexcept {
|
||||
static_cast<T*>(obj)->handle(sample);
|
||||
},
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
+[](void* obj, const memory_allocate_sample& sample) noexcept {
|
||||
static_cast<T*>(obj)->handle(sample);
|
||||
},
|
||||
#endif
|
||||
+[](void* obj, const region_sample& sample) noexcept {
|
||||
static_cast<T*>(obj)->handle(sample);
|
||||
},
|
||||
+[](void* obj, const in_time_sample& sample) noexcept {
|
||||
static_cast<T*>(obj)->handle(sample);
|
||||
},
|
||||
+[](void* obj, const pmc_event_with_sample& sample) noexcept {
|
||||
static_cast<T*>(obj)->handle(sample);
|
||||
},
|
||||
+[](void* obj, const amd_smi_sample& sample) noexcept {
|
||||
static_cast<T*>(obj)->handle(sample);
|
||||
},
|
||||
+[](void* obj, const cpu_freq_sample& sample) noexcept {
|
||||
static_cast<T*>(obj)->handle(sample);
|
||||
},
|
||||
+[](void* obj, const backtrace_region_sample& sample) noexcept {
|
||||
static_cast<T*>(obj)->handle(sample);
|
||||
},
|
||||
+[](void* obj) noexcept { static_cast<T*>(obj)->prepare_for_processing(); },
|
||||
+[](void* obj) noexcept { static_cast<T*>(obj)->finalize_processing(); }
|
||||
};
|
||||
return vtable;
|
||||
}
|
||||
|
||||
void* m_object;
|
||||
const vtable_t* m_vtable;
|
||||
};
|
||||
|
||||
struct sample_processor_t
|
||||
{
|
||||
void clear_handlers() { m_processor_view_list.clear(); }
|
||||
|
||||
template <typename T>
|
||||
void add_handler(T& handler)
|
||||
{
|
||||
m_processor_view_list.emplace_back(handler);
|
||||
}
|
||||
|
||||
template <typename SampleType>
|
||||
ROCPROFSYS_INLINE void handle_sample(const SampleType& sample) const
|
||||
{
|
||||
for(const auto& view : m_processor_view_list)
|
||||
view.handle(sample);
|
||||
}
|
||||
|
||||
ROCPROFSYS_INLINE void prepare_for_processing() const noexcept
|
||||
{
|
||||
for(const auto& view : m_processor_view_list)
|
||||
view.prepare_for_processing();
|
||||
}
|
||||
|
||||
ROCPROFSYS_INLINE void finalize_processing() const noexcept
|
||||
{
|
||||
for(const auto& view : m_processor_view_list)
|
||||
view.finalize_processing();
|
||||
}
|
||||
|
||||
ROCPROFSYS_INLINE bool is_empty() const noexcept
|
||||
{
|
||||
return m_processor_view_list.empty();
|
||||
}
|
||||
|
||||
ROCPROFSYS_INLINE void execute_sample_processing(
|
||||
type_identifier_t type_identifier, const trace_cache::cacheable_t& sample) const
|
||||
{
|
||||
switch(type_identifier)
|
||||
{
|
||||
case type_identifier_t::region:
|
||||
handle_sample(static_cast<const region_sample&>(sample));
|
||||
break;
|
||||
case type_identifier_t::kernel_dispatch:
|
||||
handle_sample(static_cast<const kernel_dispatch_sample&>(sample));
|
||||
break;
|
||||
case type_identifier_t::memory_copy:
|
||||
handle_sample(static_cast<const memory_copy_sample&>(sample));
|
||||
break;
|
||||
#if ROCPROFILER_VERSION >= 600
|
||||
case type_identifier_t::memory_alloc:
|
||||
handle_sample(static_cast<const memory_allocate_sample&>(sample));
|
||||
break;
|
||||
#endif
|
||||
case type_identifier_t::in_time_sample:
|
||||
handle_sample(static_cast<const in_time_sample&>(sample));
|
||||
break;
|
||||
case type_identifier_t::pmc_event_with_sample:
|
||||
handle_sample(static_cast<const pmc_event_with_sample&>(sample));
|
||||
break;
|
||||
case type_identifier_t::amd_smi_sample:
|
||||
handle_sample(static_cast<const amd_smi_sample&>(sample));
|
||||
break;
|
||||
case type_identifier_t::cpu_freq_sample:
|
||||
handle_sample(static_cast<const cpu_freq_sample&>(sample));
|
||||
break;
|
||||
case type_identifier_t::backtrace_region_sample:
|
||||
handle_sample(static_cast<const backtrace_region_sample&>(sample));
|
||||
break;
|
||||
default: throw std::runtime_error("Unsupported sample type");
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<processor_view_t> m_processor_view_list;
|
||||
};
|
||||
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
@@ -21,6 +21,7 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include "core/trace_cache/cacheable.hpp"
|
||||
#include <cstdint>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
@@ -28,36 +29,70 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#if ROCPROFSYS_USE_ROCM > 0
|
||||
# include <rocprofiler-sdk/version.h>
|
||||
#endif
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
struct storage_parsed_type_base
|
||||
{};
|
||||
|
||||
struct kernel_dispatch_sample : storage_parsed_type_base
|
||||
enum class type_identifier_t : uint32_t
|
||||
{
|
||||
// Timing fields
|
||||
in_time_sample = 0x0000,
|
||||
pmc_event_with_sample = 0x0001,
|
||||
region = 0x0002,
|
||||
kernel_dispatch = 0x0003,
|
||||
memory_copy = 0x0004,
|
||||
memory_alloc = 0x0005,
|
||||
amd_smi_sample = 0x0006,
|
||||
cpu_freq_sample = 0x0007,
|
||||
backtrace_region_sample = 0x0008,
|
||||
fragmented_space = 0xFFFF
|
||||
};
|
||||
|
||||
struct kernel_dispatch_sample : cacheable_t
|
||||
{
|
||||
static constexpr type_identifier_t type_identifier =
|
||||
type_identifier_t::kernel_dispatch;
|
||||
|
||||
kernel_dispatch_sample() = default;
|
||||
kernel_dispatch_sample(uint64_t _start_timestamp, uint64_t _end_timestamp,
|
||||
uint64_t _thread_id, uint64_t _agent_id_handle,
|
||||
uint64_t _kernel_id, uint64_t _dispatch_id,
|
||||
uint64_t _queue_id_handle, uint64_t _correlation_id_internal,
|
||||
uint64_t _correlation_id_ancestor,
|
||||
uint32_t _private_segment_size, uint32_t _group_segment_size,
|
||||
uint32_t _workgroup_size_x, uint32_t _workgroup_size_y,
|
||||
uint32_t _workgroup_size_z, uint32_t _grid_size_x,
|
||||
uint32_t _grid_size_y, uint32_t _grid_size_z,
|
||||
size_t _stream_handle)
|
||||
: start_timestamp(_start_timestamp)
|
||||
, end_timestamp(_end_timestamp)
|
||||
, thread_id(_thread_id)
|
||||
, agent_id_handle(_agent_id_handle)
|
||||
, kernel_id(_kernel_id)
|
||||
, dispatch_id(_dispatch_id)
|
||||
, queue_id_handle(_queue_id_handle)
|
||||
, correlation_id_internal(_correlation_id_internal)
|
||||
, correlation_id_ancestor(_correlation_id_ancestor)
|
||||
, private_segment_size(_private_segment_size)
|
||||
, group_segment_size(_group_segment_size)
|
||||
, workgroup_size_x(_workgroup_size_x)
|
||||
, workgroup_size_y(_workgroup_size_y)
|
||||
, workgroup_size_z(_workgroup_size_z)
|
||||
, grid_size_x(_grid_size_x)
|
||||
, grid_size_y(_grid_size_y)
|
||||
, grid_size_z(_grid_size_z)
|
||||
, stream_handle(_stream_handle)
|
||||
{}
|
||||
|
||||
uint64_t start_timestamp;
|
||||
uint64_t end_timestamp;
|
||||
|
||||
// Identification fields
|
||||
uint64_t thread_id;
|
||||
uint64_t agent_id_handle;
|
||||
uint64_t kernel_id;
|
||||
uint64_t dispatch_id;
|
||||
uint64_t queue_id_handle;
|
||||
|
||||
// Correlation fields
|
||||
uint64_t correlation_id_internal;
|
||||
uint64_t correlation_id_ancestor;
|
||||
|
||||
// Dispatch configuration
|
||||
uint32_t private_segment_size;
|
||||
uint32_t group_segment_size;
|
||||
uint32_t workgroup_size_x;
|
||||
@@ -66,69 +101,206 @@ struct kernel_dispatch_sample : storage_parsed_type_base
|
||||
uint32_t grid_size_x;
|
||||
uint32_t grid_size_y;
|
||||
uint32_t grid_size_z;
|
||||
|
||||
// Stream handle
|
||||
size_t stream_handle;
|
||||
size_t stream_handle;
|
||||
};
|
||||
|
||||
struct memory_copy_sample : storage_parsed_type_base
|
||||
template <>
|
||||
inline void
|
||||
serialize(uint8_t* buffer, const kernel_dispatch_sample& item)
|
||||
{
|
||||
// Timing fields
|
||||
utility::store_value(
|
||||
buffer, item.start_timestamp, item.end_timestamp, item.thread_id,
|
||||
item.agent_id_handle, item.kernel_id, item.dispatch_id, item.queue_id_handle,
|
||||
item.correlation_id_internal, item.correlation_id_ancestor,
|
||||
item.private_segment_size, item.group_segment_size, item.workgroup_size_x,
|
||||
item.workgroup_size_y, item.workgroup_size_z, item.grid_size_x, item.grid_size_y,
|
||||
item.grid_size_z, (uint64_t) item.stream_handle);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline kernel_dispatch_sample
|
||||
deserialize(uint8_t*& buffer)
|
||||
{
|
||||
kernel_dispatch_sample item;
|
||||
uint64_t stream_handle;
|
||||
utility::parse_value(buffer, item.start_timestamp, item.end_timestamp, item.thread_id,
|
||||
item.agent_id_handle, item.kernel_id, item.dispatch_id,
|
||||
item.queue_id_handle, item.correlation_id_internal,
|
||||
item.correlation_id_ancestor, item.private_segment_size,
|
||||
item.group_segment_size, item.workgroup_size_x,
|
||||
item.workgroup_size_y, item.workgroup_size_z, item.grid_size_x,
|
||||
item.grid_size_y, item.grid_size_z, stream_handle);
|
||||
item.stream_handle = stream_handle;
|
||||
return item;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline size_t
|
||||
get_size(const kernel_dispatch_sample& item)
|
||||
{
|
||||
return utility::get_size(
|
||||
item.start_timestamp, item.end_timestamp, item.thread_id, item.agent_id_handle,
|
||||
item.kernel_id, item.dispatch_id, item.queue_id_handle,
|
||||
item.correlation_id_internal, item.correlation_id_ancestor,
|
||||
item.private_segment_size, item.group_segment_size, item.workgroup_size_x,
|
||||
item.workgroup_size_y, item.workgroup_size_z, item.grid_size_x, item.grid_size_y,
|
||||
item.grid_size_z, (uint64_t) item.stream_handle);
|
||||
}
|
||||
|
||||
struct memory_copy_sample : cacheable_t
|
||||
{
|
||||
static constexpr type_identifier_t type_identifier = type_identifier_t::memory_copy;
|
||||
|
||||
memory_copy_sample() = default;
|
||||
memory_copy_sample(uint64_t _start_timestamp, uint64_t _end_timestamp,
|
||||
uint64_t _thread_id, uint64_t _dst_agent_id_handle,
|
||||
uint64_t _src_agent_id_handle, int32_t _kind, int32_t _operation,
|
||||
uint64_t _bytes, uint64_t _correlation_id_internal,
|
||||
uint64_t _correlation_id_ancestor, uint64_t _dst_address_value,
|
||||
uint64_t _src_address_value, size_t _stream_handle)
|
||||
: start_timestamp(_start_timestamp)
|
||||
, end_timestamp(_end_timestamp)
|
||||
, thread_id(_thread_id)
|
||||
, dst_agent_id_handle(_dst_agent_id_handle)
|
||||
, src_agent_id_handle(_src_agent_id_handle)
|
||||
, kind(_kind)
|
||||
, operation(_operation)
|
||||
, bytes(_bytes)
|
||||
, correlation_id_internal(_correlation_id_internal)
|
||||
, correlation_id_ancestor(_correlation_id_ancestor)
|
||||
, dst_address_value(_dst_address_value)
|
||||
, src_address_value(_src_address_value)
|
||||
, stream_handle(_stream_handle)
|
||||
{}
|
||||
|
||||
uint64_t start_timestamp;
|
||||
uint64_t end_timestamp;
|
||||
|
||||
// Identification fields
|
||||
uint64_t thread_id;
|
||||
uint64_t dst_agent_id_handle;
|
||||
uint64_t src_agent_id_handle;
|
||||
|
||||
// Operation details
|
||||
int32_t kind;
|
||||
int32_t operation;
|
||||
uint64_t bytes;
|
||||
|
||||
// Correlation fields
|
||||
uint64_t correlation_id_internal;
|
||||
uint64_t correlation_id_ancestor;
|
||||
|
||||
// Address fields (version dependent)
|
||||
uint64_t dst_address_value;
|
||||
uint64_t src_address_value;
|
||||
|
||||
// Stream handle
|
||||
size_t stream_handle;
|
||||
size_t stream_handle;
|
||||
};
|
||||
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
struct memory_allocate_sample : storage_parsed_type_base
|
||||
template <>
|
||||
inline void
|
||||
serialize(uint8_t* buffer, const memory_copy_sample& item)
|
||||
{
|
||||
// Timing fields
|
||||
utility::store_value(buffer, item.start_timestamp, item.end_timestamp, item.thread_id,
|
||||
item.dst_agent_id_handle, item.src_agent_id_handle, item.kind,
|
||||
item.operation, item.bytes, item.correlation_id_internal,
|
||||
item.correlation_id_ancestor, item.dst_address_value,
|
||||
item.src_address_value, (uint64_t) item.stream_handle);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline memory_copy_sample
|
||||
deserialize(uint8_t*& buffer)
|
||||
{
|
||||
memory_copy_sample item;
|
||||
uint64_t stream_handle;
|
||||
utility::parse_value(buffer, item.start_timestamp, item.end_timestamp, item.thread_id,
|
||||
item.dst_agent_id_handle, item.src_agent_id_handle, item.kind,
|
||||
item.operation, item.bytes, item.correlation_id_internal,
|
||||
item.correlation_id_ancestor, item.dst_address_value,
|
||||
item.src_address_value, stream_handle);
|
||||
item.stream_handle = stream_handle;
|
||||
return item;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline size_t
|
||||
get_size(const memory_copy_sample& item)
|
||||
{
|
||||
return utility::get_size(
|
||||
item.start_timestamp, item.end_timestamp, item.thread_id,
|
||||
item.dst_agent_id_handle, item.src_agent_id_handle, item.kind, item.operation,
|
||||
item.bytes, item.correlation_id_internal, item.correlation_id_ancestor,
|
||||
item.dst_address_value, item.src_address_value, (uint64_t) item.stream_handle);
|
||||
}
|
||||
|
||||
struct memory_allocate_sample : cacheable_t
|
||||
{
|
||||
static constexpr type_identifier_t type_identifier = type_identifier_t::memory_alloc;
|
||||
|
||||
memory_allocate_sample() = default;
|
||||
memory_allocate_sample(uint64_t _start_timestamp, uint64_t _end_timestamp,
|
||||
uint64_t _thread_id, uint64_t _agent_id_handle, int32_t _kind,
|
||||
int32_t _operation, uint64_t _allocation_size,
|
||||
uint64_t _correlation_id_internal,
|
||||
uint64_t _correlation_id_ancestor, uint64_t _address_value,
|
||||
size_t _stream_handle)
|
||||
: start_timestamp(_start_timestamp)
|
||||
, end_timestamp(_end_timestamp)
|
||||
, thread_id(_thread_id)
|
||||
, agent_id_handle(_agent_id_handle)
|
||||
, kind(_kind)
|
||||
, operation(_operation)
|
||||
, allocation_size(_allocation_size)
|
||||
, correlation_id_internal(_correlation_id_internal)
|
||||
, correlation_id_ancestor(_correlation_id_ancestor)
|
||||
, address_value(_address_value)
|
||||
, stream_handle(_stream_handle)
|
||||
{}
|
||||
|
||||
uint64_t start_timestamp;
|
||||
uint64_t end_timestamp;
|
||||
|
||||
// Identification fields
|
||||
uint64_t thread_id;
|
||||
uint64_t agent_id_handle;
|
||||
|
||||
// Operation details
|
||||
int32_t kind;
|
||||
int32_t operation;
|
||||
uint64_t allocation_size;
|
||||
|
||||
// Correlation fields
|
||||
uint64_t correlation_id_internal;
|
||||
uint64_t correlation_id_ancestor;
|
||||
|
||||
// Address fields (version dependent)
|
||||
uint64_t address_value;
|
||||
|
||||
// Stream handle
|
||||
size_t stream_handle;
|
||||
size_t stream_handle;
|
||||
};
|
||||
#endif
|
||||
|
||||
struct region_sample : storage_parsed_type_base
|
||||
template <>
|
||||
inline void
|
||||
serialize(uint8_t* buffer, const memory_allocate_sample& item)
|
||||
{
|
||||
utility::store_value(buffer, item.start_timestamp, item.end_timestamp, item.thread_id,
|
||||
item.agent_id_handle, item.kind, item.operation,
|
||||
item.allocation_size, item.correlation_id_internal,
|
||||
item.correlation_id_ancestor, item.address_value,
|
||||
(uint64_t) item.stream_handle);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline memory_allocate_sample
|
||||
deserialize(uint8_t*& buffer)
|
||||
{
|
||||
memory_allocate_sample item;
|
||||
uint64_t stream_handle;
|
||||
utility::parse_value(buffer, item.start_timestamp, item.end_timestamp, item.thread_id,
|
||||
item.agent_id_handle, item.kind, item.operation,
|
||||
item.allocation_size, item.correlation_id_internal,
|
||||
item.correlation_id_ancestor, item.address_value, stream_handle);
|
||||
item.stream_handle = stream_handle;
|
||||
return item;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline size_t
|
||||
get_size(const memory_allocate_sample& item)
|
||||
{
|
||||
return utility::get_size(
|
||||
item.start_timestamp, item.end_timestamp, item.thread_id, item.agent_id_handle,
|
||||
item.kind, item.operation, item.allocation_size, item.correlation_id_internal,
|
||||
item.correlation_id_ancestor, item.address_value, (uint64_t) item.stream_handle);
|
||||
}
|
||||
|
||||
struct region_sample : cacheable_t
|
||||
{
|
||||
static constexpr type_identifier_t type_identifier = type_identifier_t::region;
|
||||
|
||||
region_sample() = default;
|
||||
region_sample(uint64_t _thread_id, std::string _name,
|
||||
uint64_t _correlation_id_internal, uint64_t _correlation_id_ancestor,
|
||||
@@ -147,20 +319,74 @@ struct region_sample : storage_parsed_type_base
|
||||
|
||||
uint64_t thread_id;
|
||||
std::string name;
|
||||
|
||||
uint64_t correlation_id_internal;
|
||||
uint64_t correlation_id_ancestor;
|
||||
|
||||
uint64_t start_timestamp;
|
||||
uint64_t end_timestamp;
|
||||
|
||||
uint64_t correlation_id_internal;
|
||||
uint64_t correlation_id_ancestor;
|
||||
uint64_t start_timestamp;
|
||||
uint64_t end_timestamp;
|
||||
std::string call_stack;
|
||||
std::string args_str;
|
||||
std::string category;
|
||||
};
|
||||
|
||||
struct in_time_sample : storage_parsed_type_base
|
||||
template <>
|
||||
inline void
|
||||
serialize(uint8_t* buffer, const region_sample& item)
|
||||
{
|
||||
utility::store_value(
|
||||
buffer, item.thread_id, std::string_view(item.name), item.correlation_id_internal,
|
||||
item.correlation_id_ancestor, item.start_timestamp, item.end_timestamp,
|
||||
std::string_view(item.call_stack), std::string_view(item.args_str),
|
||||
std::string_view(item.category));
|
||||
}
|
||||
|
||||
template <>
|
||||
inline region_sample
|
||||
deserialize(uint8_t*& buffer)
|
||||
{
|
||||
region_sample item;
|
||||
std::string_view name_view, call_stack_view, args_str_view, category_view;
|
||||
utility::parse_value(buffer, item.thread_id, name_view, item.correlation_id_internal,
|
||||
item.correlation_id_ancestor, item.start_timestamp,
|
||||
item.end_timestamp, call_stack_view, args_str_view,
|
||||
category_view);
|
||||
item.name = std::string(name_view);
|
||||
item.call_stack = std::string(call_stack_view);
|
||||
item.args_str = std::string(args_str_view);
|
||||
item.category = std::string(category_view);
|
||||
return item;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline size_t
|
||||
get_size(const region_sample& item)
|
||||
{
|
||||
return utility::get_size(
|
||||
item.thread_id, std::string_view(item.name), item.correlation_id_internal,
|
||||
item.correlation_id_ancestor, item.start_timestamp, item.end_timestamp,
|
||||
std::string_view(item.call_stack), std::string_view(item.args_str),
|
||||
std::string_view(item.category));
|
||||
}
|
||||
|
||||
struct in_time_sample : cacheable_t
|
||||
{
|
||||
static constexpr type_identifier_t type_identifier =
|
||||
type_identifier_t::in_time_sample;
|
||||
|
||||
in_time_sample() = default;
|
||||
in_time_sample(std::string _track_name, size_t _timestamp_ns,
|
||||
std::string _event_metadata, size_t _stack_id, size_t _parent_stack_id,
|
||||
size_t _correlation_id, std::string _call_stack,
|
||||
std::string _line_info)
|
||||
: track_name(std::move(_track_name))
|
||||
, timestamp_ns(_timestamp_ns)
|
||||
, event_metadata(std::move(_event_metadata))
|
||||
, stack_id(_stack_id)
|
||||
, parent_stack_id(_parent_stack_id)
|
||||
, correlation_id(_correlation_id)
|
||||
, call_stack(std::move(_call_stack))
|
||||
, line_info(std::move(_line_info))
|
||||
{}
|
||||
|
||||
std::string track_name;
|
||||
size_t timestamp_ns;
|
||||
std::string event_metadata;
|
||||
@@ -171,16 +397,149 @@ struct in_time_sample : storage_parsed_type_base
|
||||
std::string line_info;
|
||||
};
|
||||
|
||||
template <>
|
||||
inline void
|
||||
serialize(uint8_t* buffer, const in_time_sample& item)
|
||||
{
|
||||
utility::store_value(
|
||||
buffer, std::string_view(item.track_name), (uint64_t) item.timestamp_ns,
|
||||
std::string_view(item.event_metadata), (uint64_t) item.stack_id,
|
||||
(uint64_t) item.parent_stack_id, (uint64_t) item.correlation_id,
|
||||
std::string_view(item.call_stack), std::string_view(item.line_info));
|
||||
}
|
||||
|
||||
template <>
|
||||
inline in_time_sample
|
||||
deserialize(uint8_t*& buffer)
|
||||
{
|
||||
in_time_sample item;
|
||||
std::string_view track_name_view, event_metadata_view, call_stack_view,
|
||||
line_info_view;
|
||||
uint64_t timestamp_ns, stack_id, parent_stack_id, correlation_id;
|
||||
utility::parse_value(buffer, track_name_view, timestamp_ns, event_metadata_view,
|
||||
stack_id, parent_stack_id, correlation_id, call_stack_view,
|
||||
line_info_view);
|
||||
item.track_name = std::string(track_name_view);
|
||||
item.timestamp_ns = timestamp_ns;
|
||||
item.event_metadata = std::string(event_metadata_view);
|
||||
item.stack_id = stack_id;
|
||||
item.parent_stack_id = parent_stack_id;
|
||||
item.correlation_id = correlation_id;
|
||||
item.call_stack = std::string(call_stack_view);
|
||||
item.line_info = std::string(line_info_view);
|
||||
return item;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline size_t
|
||||
get_size(const in_time_sample& item)
|
||||
{
|
||||
return utility::get_size(
|
||||
std::string_view(item.track_name), (uint64_t) item.timestamp_ns,
|
||||
std::string_view(item.event_metadata), (uint64_t) item.stack_id,
|
||||
(uint64_t) item.parent_stack_id, (uint64_t) item.correlation_id,
|
||||
std::string_view(item.call_stack), std::string_view(item.line_info));
|
||||
}
|
||||
|
||||
struct pmc_event_with_sample : in_time_sample
|
||||
{
|
||||
static constexpr type_identifier_t type_identifier =
|
||||
type_identifier_t::pmc_event_with_sample;
|
||||
|
||||
pmc_event_with_sample() = default;
|
||||
pmc_event_with_sample(std::string _track_name, size_t _timestamp_ns,
|
||||
std::string _event_metadata, size_t _stack_id,
|
||||
size_t _parent_stack_id, size_t _correlation_id,
|
||||
std::string _call_stack, std::string _line_info,
|
||||
uint32_t _device_id, uint8_t _device_type,
|
||||
std::string _pmc_info_name, double _value)
|
||||
: in_time_sample(std::move(_track_name), _timestamp_ns, std::move(_event_metadata),
|
||||
_stack_id, _parent_stack_id, _correlation_id, std::move(_call_stack),
|
||||
std::move(_line_info))
|
||||
, device_id(_device_id)
|
||||
, device_type(_device_type)
|
||||
, pmc_info_name(std::move(_pmc_info_name))
|
||||
, value(_value)
|
||||
{}
|
||||
|
||||
uint32_t device_id;
|
||||
uint8_t device_type;
|
||||
std::string pmc_info_name;
|
||||
double value;
|
||||
};
|
||||
|
||||
struct amd_smi_sample : storage_parsed_type_base
|
||||
template <>
|
||||
inline void
|
||||
serialize(uint8_t* buffer, const pmc_event_with_sample& item)
|
||||
{
|
||||
utility::store_value(
|
||||
buffer, std::string_view(item.track_name), (uint64_t) item.timestamp_ns,
|
||||
std::string_view(item.event_metadata), (uint64_t) item.stack_id,
|
||||
(uint64_t) item.parent_stack_id, (uint64_t) item.correlation_id,
|
||||
std::string_view(item.call_stack), std::string_view(item.line_info),
|
||||
item.device_id, item.device_type, std::string_view(item.pmc_info_name),
|
||||
item.value);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline pmc_event_with_sample
|
||||
deserialize(uint8_t*& buffer)
|
||||
{
|
||||
pmc_event_with_sample item;
|
||||
std::string_view track_name_view, event_metadata_view, call_stack_view,
|
||||
line_info_view, pmc_info_name_view;
|
||||
uint64_t timestamp_ns, stack_id, parent_stack_id, correlation_id;
|
||||
utility::parse_value(buffer, track_name_view, timestamp_ns, event_metadata_view,
|
||||
stack_id, parent_stack_id, correlation_id, call_stack_view,
|
||||
line_info_view, item.device_id, item.device_type,
|
||||
pmc_info_name_view, item.value);
|
||||
item.track_name = std::string(track_name_view);
|
||||
item.timestamp_ns = timestamp_ns;
|
||||
item.event_metadata = std::string(event_metadata_view);
|
||||
item.stack_id = stack_id;
|
||||
item.parent_stack_id = parent_stack_id;
|
||||
item.correlation_id = correlation_id;
|
||||
item.call_stack = std::string(call_stack_view);
|
||||
item.line_info = std::string(line_info_view);
|
||||
item.pmc_info_name = std::string(pmc_info_name_view);
|
||||
return item;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline size_t
|
||||
get_size(const pmc_event_with_sample& item)
|
||||
{
|
||||
return utility::get_size(
|
||||
std::string_view(item.track_name), (uint64_t) item.timestamp_ns,
|
||||
std::string_view(item.event_metadata), (uint64_t) item.stack_id,
|
||||
(uint64_t) item.parent_stack_id, (uint64_t) item.correlation_id,
|
||||
std::string_view(item.call_stack), std::string_view(item.line_info),
|
||||
item.device_id, item.device_type, std::string_view(item.pmc_info_name),
|
||||
item.value);
|
||||
}
|
||||
|
||||
struct amd_smi_sample : cacheable_t
|
||||
{
|
||||
static constexpr type_identifier_t type_identifier =
|
||||
type_identifier_t::amd_smi_sample;
|
||||
|
||||
amd_smi_sample() = default;
|
||||
amd_smi_sample(uint64_t _settings, uint32_t _device_id, size_t _timestamp,
|
||||
uint32_t _gfx_activity, uint32_t _umc_activity, uint32_t _mm_activity,
|
||||
uint32_t _power, int64_t _temperature, size_t _mem_usage,
|
||||
std::vector<uint8_t> _gpu_activity)
|
||||
: settings(_settings)
|
||||
, device_id(_device_id)
|
||||
, timestamp(_timestamp)
|
||||
, gfx_activity(_gfx_activity)
|
||||
, umc_activity(_umc_activity)
|
||||
, mm_activity(_mm_activity)
|
||||
, power(_power)
|
||||
, temperature(_temperature)
|
||||
, mem_usage(_mem_usage)
|
||||
, gpu_activity(std::move(_gpu_activity))
|
||||
{}
|
||||
|
||||
enum class settings_positions : uint8_t
|
||||
{
|
||||
busy = 0,
|
||||
@@ -205,8 +564,61 @@ struct amd_smi_sample : storage_parsed_type_base
|
||||
std::vector<uint8_t> gpu_activity;
|
||||
};
|
||||
|
||||
struct cpu_freq_sample : storage_parsed_type_base
|
||||
template <>
|
||||
inline void
|
||||
serialize(uint8_t* buffer, const amd_smi_sample& item)
|
||||
{
|
||||
utility::store_value(buffer, item.settings, item.device_id, (uint64_t) item.timestamp,
|
||||
item.gfx_activity, item.umc_activity, item.mm_activity,
|
||||
item.power, item.temperature, (uint64_t) item.mem_usage,
|
||||
item.gpu_activity);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline amd_smi_sample
|
||||
deserialize(uint8_t*& buffer)
|
||||
{
|
||||
amd_smi_sample item;
|
||||
uint64_t timestamp, mem_usage;
|
||||
utility::parse_value(buffer, item.settings, item.device_id, timestamp,
|
||||
item.gfx_activity, item.umc_activity, item.mm_activity,
|
||||
item.power, item.temperature, mem_usage, item.gpu_activity);
|
||||
item.timestamp = timestamp;
|
||||
item.mem_usage = mem_usage;
|
||||
return item;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline size_t
|
||||
get_size(const amd_smi_sample& item)
|
||||
{
|
||||
return utility::get_size(item.settings, item.device_id, (uint64_t) item.timestamp,
|
||||
item.gfx_activity, item.umc_activity, item.mm_activity,
|
||||
item.power, item.temperature, (uint64_t) item.mem_usage,
|
||||
item.gpu_activity);
|
||||
}
|
||||
|
||||
struct cpu_freq_sample : cacheable_t
|
||||
{
|
||||
static constexpr type_identifier_t type_identifier =
|
||||
type_identifier_t::cpu_freq_sample;
|
||||
|
||||
cpu_freq_sample() = default;
|
||||
cpu_freq_sample(size_t _timestamp, int64_t _page_rss, int64_t _virt_mem_usage,
|
||||
int64_t _peak_rss, int64_t _context_switch_count,
|
||||
int64_t _page_faults, int64_t _user_mode_time,
|
||||
int64_t _kernel_mode_time, std::vector<uint8_t> _freqs)
|
||||
: timestamp(_timestamp)
|
||||
, page_rss(_page_rss)
|
||||
, virt_mem_usage(_virt_mem_usage)
|
||||
, peak_rss(_peak_rss)
|
||||
, context_switch_count(_context_switch_count)
|
||||
, page_faults(_page_faults)
|
||||
, user_mode_time(_user_mode_time)
|
||||
, kernel_mode_time(_kernel_mode_time)
|
||||
, freqs(std::move(_freqs))
|
||||
{}
|
||||
|
||||
size_t timestamp;
|
||||
int64_t page_rss;
|
||||
int64_t virt_mem_usage;
|
||||
@@ -218,8 +630,44 @@ struct cpu_freq_sample : storage_parsed_type_base
|
||||
std::vector<uint8_t> freqs;
|
||||
};
|
||||
|
||||
struct backtrace_region_sample : storage_parsed_type_base
|
||||
template <>
|
||||
inline void
|
||||
serialize(uint8_t* buffer, const cpu_freq_sample& item)
|
||||
{
|
||||
utility::store_value(buffer, (uint64_t) item.timestamp, item.page_rss,
|
||||
item.virt_mem_usage, item.peak_rss, item.context_switch_count,
|
||||
item.page_faults, item.user_mode_time, item.kernel_mode_time,
|
||||
item.freqs);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline cpu_freq_sample
|
||||
deserialize(uint8_t*& buffer)
|
||||
{
|
||||
cpu_freq_sample item;
|
||||
uint64_t timestamp;
|
||||
utility::parse_value(buffer, timestamp, item.page_rss, item.virt_mem_usage,
|
||||
item.peak_rss, item.context_switch_count, item.page_faults,
|
||||
item.user_mode_time, item.kernel_mode_time, item.freqs);
|
||||
item.timestamp = timestamp;
|
||||
return item;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline size_t
|
||||
get_size(const cpu_freq_sample& item)
|
||||
{
|
||||
return utility::get_size((uint64_t) item.timestamp, item.page_rss,
|
||||
item.virt_mem_usage, item.peak_rss,
|
||||
item.context_switch_count, item.page_faults,
|
||||
item.user_mode_time, item.kernel_mode_time, item.freqs);
|
||||
}
|
||||
|
||||
struct backtrace_region_sample : cacheable_t
|
||||
{
|
||||
static constexpr type_identifier_t type_identifier =
|
||||
type_identifier_t::backtrace_region_sample;
|
||||
|
||||
backtrace_region_sample() = default;
|
||||
backtrace_region_sample(uint32_t _type, uint64_t _thread_id, std::string _track_name,
|
||||
std::string _name, uint64_t _start_timestamp,
|
||||
@@ -242,30 +690,54 @@ struct backtrace_region_sample : storage_parsed_type_base
|
||||
uint64_t thread_id;
|
||||
std::string track_name;
|
||||
std::string name;
|
||||
|
||||
uint64_t start_timestamp;
|
||||
uint64_t end_timestamp;
|
||||
|
||||
uint64_t start_timestamp;
|
||||
uint64_t end_timestamp;
|
||||
std::string category;
|
||||
std::string call_stack;
|
||||
std::string line_info;
|
||||
std::string extdata;
|
||||
};
|
||||
|
||||
enum class entry_type : uint32_t
|
||||
template <>
|
||||
inline void
|
||||
serialize(uint8_t* buffer, const backtrace_region_sample& item)
|
||||
{
|
||||
in_time_sample = 0x0000,
|
||||
pmc_event_with_sample = 0x0001,
|
||||
region = 0x0002,
|
||||
kernel_dispatch = 0x0003,
|
||||
memory_copy = 0x0004,
|
||||
#if(ROCPROFSYS_USE_ROCM && ROCPROFILER_VERSION >= 600)
|
||||
memory_alloc = 0x0005,
|
||||
#endif
|
||||
amd_smi_sample = 0x0006,
|
||||
cpu_freq_sample = 0x0007,
|
||||
backtrace_region_sample = 0x0008,
|
||||
fragmented_space = 0xFFFF
|
||||
};
|
||||
utility::store_value(
|
||||
buffer, item.type, item.thread_id, std::string_view(item.track_name),
|
||||
std::string_view(item.name), item.start_timestamp, item.end_timestamp,
|
||||
std::string_view(item.category), std::string_view(item.call_stack),
|
||||
std::string_view(item.line_info), std::string_view(item.extdata));
|
||||
}
|
||||
|
||||
template <>
|
||||
inline backtrace_region_sample
|
||||
deserialize(uint8_t*& buffer)
|
||||
{
|
||||
backtrace_region_sample item;
|
||||
std::string_view track_name_view, name_view, category_view, call_stack_view,
|
||||
line_info_view, extdata_view;
|
||||
utility::parse_value(buffer, item.type, item.thread_id, track_name_view, name_view,
|
||||
item.start_timestamp, item.end_timestamp, category_view,
|
||||
call_stack_view, line_info_view, extdata_view);
|
||||
item.track_name = std::string(track_name_view);
|
||||
item.name = std::string(name_view);
|
||||
item.category = std::string(category_view);
|
||||
item.call_stack = std::string(call_stack_view);
|
||||
item.line_info = std::string(line_info_view);
|
||||
item.extdata = std::string(extdata_view);
|
||||
return item;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline size_t
|
||||
get_size(const backtrace_region_sample& item)
|
||||
{
|
||||
return utility::get_size(
|
||||
item.type, item.thread_id, std::string_view(item.track_name),
|
||||
std::string_view(item.name), item.start_timestamp, item.end_timestamp,
|
||||
std::string_view(item.category), std::string_view(item.call_stack),
|
||||
std::string_view(item.line_info), std::string_view(item.extdata));
|
||||
}
|
||||
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
|
||||
@@ -1,274 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "storage_parser.hpp"
|
||||
#include "debug.hpp"
|
||||
#include "trace_cache/sample_type.hpp"
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
|
||||
storage_parser::storage_parser(std::string _filename)
|
||||
: m_filename(std::move(_filename))
|
||||
{}
|
||||
|
||||
void
|
||||
storage_parser::register_type_callback(
|
||||
const entry_type& type,
|
||||
const std::function<void(const storage_parsed_type_base&)>& callback)
|
||||
{
|
||||
m_callbacks[type].push_back(callback);
|
||||
}
|
||||
|
||||
void
|
||||
storage_parser::register_on_finished_callback(
|
||||
std::unique_ptr<std::function<void()>> callback)
|
||||
{
|
||||
m_on_finished_callback = std::move(callback);
|
||||
}
|
||||
|
||||
void
|
||||
storage_parser::consume_storage()
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(0, "Consuming buffered storage with filename: %s\n",
|
||||
m_filename.c_str());
|
||||
|
||||
std::ifstream ifs(m_filename, std::ios::binary);
|
||||
if(!ifs)
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "Error opening file for reading: " << m_filename << "\n";
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
|
||||
bool _parsing_needed = !m_callbacks.empty();
|
||||
|
||||
struct __attribute__((packed)) sample_header
|
||||
{
|
||||
entry_type type;
|
||||
size_t sample_size;
|
||||
};
|
||||
|
||||
sample_header header;
|
||||
|
||||
while(!ifs.eof() && _parsing_needed)
|
||||
{
|
||||
ifs.read(reinterpret_cast<char*>(&header), sizeof(header));
|
||||
|
||||
if(header.sample_size == 0 || ifs.eof())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> sample;
|
||||
sample.reserve(header.sample_size);
|
||||
ifs.read(reinterpret_cast<char*>(sample.data()), header.sample_size);
|
||||
|
||||
if(ifs.bad())
|
||||
{
|
||||
ROCPROFSYS_WARNING(1,
|
||||
"Bad read while consuming buffered storage. Filename: %s. "
|
||||
"Bytes read: %d\n",
|
||||
m_filename.c_str(), static_cast<int>(ifs.tellg()));
|
||||
continue;
|
||||
}
|
||||
|
||||
switch(header.type)
|
||||
{
|
||||
case entry_type::kernel_dispatch:
|
||||
{
|
||||
kernel_dispatch_sample _kernel_dispatch_sample;
|
||||
parse_data(sample.data(), _kernel_dispatch_sample.start_timestamp,
|
||||
_kernel_dispatch_sample.end_timestamp,
|
||||
_kernel_dispatch_sample.thread_id,
|
||||
_kernel_dispatch_sample.agent_id_handle,
|
||||
_kernel_dispatch_sample.kernel_id,
|
||||
_kernel_dispatch_sample.dispatch_id,
|
||||
_kernel_dispatch_sample.queue_id_handle,
|
||||
_kernel_dispatch_sample.correlation_id_internal,
|
||||
_kernel_dispatch_sample.correlation_id_ancestor,
|
||||
_kernel_dispatch_sample.private_segment_size,
|
||||
_kernel_dispatch_sample.group_segment_size,
|
||||
_kernel_dispatch_sample.workgroup_size_x,
|
||||
_kernel_dispatch_sample.workgroup_size_y,
|
||||
_kernel_dispatch_sample.workgroup_size_z,
|
||||
_kernel_dispatch_sample.grid_size_x,
|
||||
_kernel_dispatch_sample.grid_size_y,
|
||||
_kernel_dispatch_sample.grid_size_z,
|
||||
_kernel_dispatch_sample.stream_handle);
|
||||
|
||||
invoke_callbacks(header.type, _kernel_dispatch_sample);
|
||||
break;
|
||||
}
|
||||
case entry_type::memory_copy:
|
||||
{
|
||||
memory_copy_sample _memory_copy_sample;
|
||||
parse_data(
|
||||
sample.data(), _memory_copy_sample.start_timestamp,
|
||||
_memory_copy_sample.end_timestamp, _memory_copy_sample.thread_id,
|
||||
_memory_copy_sample.dst_agent_id_handle,
|
||||
_memory_copy_sample.src_agent_id_handle, _memory_copy_sample.kind,
|
||||
_memory_copy_sample.operation, _memory_copy_sample.bytes,
|
||||
_memory_copy_sample.correlation_id_internal,
|
||||
_memory_copy_sample.correlation_id_ancestor,
|
||||
_memory_copy_sample.dst_address_value,
|
||||
_memory_copy_sample.src_address_value,
|
||||
_memory_copy_sample.stream_handle);
|
||||
invoke_callbacks(header.type, _memory_copy_sample);
|
||||
break;
|
||||
}
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
case entry_type::memory_alloc:
|
||||
{
|
||||
memory_allocate_sample _memory_allocate_sample;
|
||||
parse_data(sample.data(), _memory_allocate_sample.start_timestamp,
|
||||
_memory_allocate_sample.end_timestamp,
|
||||
_memory_allocate_sample.thread_id,
|
||||
_memory_allocate_sample.agent_id_handle,
|
||||
_memory_allocate_sample.kind,
|
||||
_memory_allocate_sample.operation,
|
||||
_memory_allocate_sample.allocation_size,
|
||||
_memory_allocate_sample.correlation_id_internal,
|
||||
_memory_allocate_sample.correlation_id_ancestor,
|
||||
_memory_allocate_sample.address_value,
|
||||
_memory_allocate_sample.stream_handle);
|
||||
|
||||
invoke_callbacks(header.type, _memory_allocate_sample);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case entry_type::region:
|
||||
{
|
||||
region_sample _region_sample;
|
||||
parse_data(sample.data(), _region_sample.thread_id, _region_sample.name,
|
||||
_region_sample.correlation_id_internal,
|
||||
_region_sample.correlation_id_ancestor,
|
||||
_region_sample.start_timestamp, _region_sample.end_timestamp,
|
||||
_region_sample.call_stack, _region_sample.args_str,
|
||||
_region_sample.category);
|
||||
|
||||
invoke_callbacks(header.type, _region_sample);
|
||||
break;
|
||||
}
|
||||
case entry_type::in_time_sample:
|
||||
{
|
||||
in_time_sample _in_time_sample;
|
||||
parse_data(sample.data(), _in_time_sample.track_name,
|
||||
_in_time_sample.timestamp_ns, _in_time_sample.event_metadata,
|
||||
_in_time_sample.stack_id, _in_time_sample.parent_stack_id,
|
||||
_in_time_sample.correlation_id, _in_time_sample.call_stack,
|
||||
_in_time_sample.line_info);
|
||||
invoke_callbacks(header.type, _in_time_sample);
|
||||
break;
|
||||
}
|
||||
case entry_type::pmc_event_with_sample:
|
||||
{
|
||||
pmc_event_with_sample _pmc_event_with_sample;
|
||||
parse_data(
|
||||
sample.data(), _pmc_event_with_sample.track_name,
|
||||
_pmc_event_with_sample.timestamp_ns,
|
||||
_pmc_event_with_sample.event_metadata,
|
||||
_pmc_event_with_sample.stack_id,
|
||||
_pmc_event_with_sample.parent_stack_id,
|
||||
_pmc_event_with_sample.correlation_id,
|
||||
_pmc_event_with_sample.call_stack, _pmc_event_with_sample.line_info,
|
||||
_pmc_event_with_sample.device_id, _pmc_event_with_sample.device_type,
|
||||
_pmc_event_with_sample.pmc_info_name, _pmc_event_with_sample.value);
|
||||
invoke_callbacks(header.type, _pmc_event_with_sample);
|
||||
break;
|
||||
}
|
||||
case entry_type::amd_smi_sample:
|
||||
{
|
||||
amd_smi_sample _amd_smi_sample;
|
||||
parse_data(sample.data(), _amd_smi_sample.settings,
|
||||
_amd_smi_sample.device_id, _amd_smi_sample.timestamp,
|
||||
_amd_smi_sample.gfx_activity, _amd_smi_sample.umc_activity,
|
||||
_amd_smi_sample.mm_activity, _amd_smi_sample.power,
|
||||
_amd_smi_sample.temperature, _amd_smi_sample.mem_usage,
|
||||
_amd_smi_sample.gpu_activity);
|
||||
invoke_callbacks(header.type, _amd_smi_sample);
|
||||
break;
|
||||
}
|
||||
case entry_type::cpu_freq_sample:
|
||||
{
|
||||
cpu_freq_sample _cpu_freq_sample;
|
||||
parse_data(sample.data(), _cpu_freq_sample.timestamp,
|
||||
_cpu_freq_sample.page_rss, _cpu_freq_sample.virt_mem_usage,
|
||||
_cpu_freq_sample.peak_rss,
|
||||
_cpu_freq_sample.context_switch_count,
|
||||
_cpu_freq_sample.page_faults, _cpu_freq_sample.user_mode_time,
|
||||
_cpu_freq_sample.kernel_mode_time, _cpu_freq_sample.freqs);
|
||||
invoke_callbacks(header.type, _cpu_freq_sample);
|
||||
break;
|
||||
}
|
||||
case entry_type::backtrace_region_sample:
|
||||
{
|
||||
backtrace_region_sample _backtrace_region_sample;
|
||||
parse_data(
|
||||
sample.data(), _backtrace_region_sample.type,
|
||||
_backtrace_region_sample.thread_id,
|
||||
_backtrace_region_sample.track_name, _backtrace_region_sample.name,
|
||||
_backtrace_region_sample.start_timestamp,
|
||||
_backtrace_region_sample.end_timestamp,
|
||||
_backtrace_region_sample.category,
|
||||
_backtrace_region_sample.call_stack,
|
||||
_backtrace_region_sample.line_info, _backtrace_region_sample.extdata);
|
||||
invoke_callbacks(header.type, _backtrace_region_sample);
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
ifs.close();
|
||||
|
||||
if(m_on_finished_callback != nullptr)
|
||||
{
|
||||
(*m_on_finished_callback)();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
storage_parser::invoke_callbacks(entry_type type, const storage_parsed_type_base& parsed)
|
||||
{
|
||||
auto _callback_list = m_callbacks.find(type);
|
||||
if(_callback_list == m_callbacks.end())
|
||||
{
|
||||
ROCPROFSYS_VERBOSE(1, "Callback not found for cache postprocessing\n");
|
||||
return;
|
||||
}
|
||||
|
||||
for(auto& cb : _callback_list->second)
|
||||
{
|
||||
cb(parsed);
|
||||
}
|
||||
}
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
@@ -22,73 +22,142 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "buffer_storage.hpp"
|
||||
#include "sample_type.hpp"
|
||||
#include "common/defines.h"
|
||||
#include "core/debug.hpp"
|
||||
#include "core/trace_cache/cacheable.hpp"
|
||||
#include "core/trace_cache/type_registry.hpp"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <rocprofiler-systems/categories.h>
|
||||
#include <stdint.h>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
using postprocessing_callback = std::function<void(const storage_parsed_type_base&)>;
|
||||
class cache_manager;
|
||||
template <typename TypeIdentifierEnum, typename... SupportedTypes>
|
||||
class storage_parser
|
||||
{
|
||||
public:
|
||||
void register_type_callback(const entry_type& type,
|
||||
const postprocessing_callback& callback);
|
||||
static_assert(type_traits::is_enum_class_v<TypeIdentifierEnum>,
|
||||
"TypeIdentifierEnum must be an enum class");
|
||||
|
||||
void consume_storage();
|
||||
void register_on_finished_callback(std::unique_ptr<std::function<void()>> callback);
|
||||
static_assert(sizeof...(SupportedTypes) != 0, "SupportedTypes must be non-empty");
|
||||
|
||||
public:
|
||||
storage_parser(std::string _filename)
|
||||
: m_filename(std::move(_filename))
|
||||
{}
|
||||
|
||||
template <typename TypeProcessing>
|
||||
void load(std::shared_ptr<TypeProcessing> _type_processing)
|
||||
{
|
||||
static_assert(
|
||||
type_traits::has_execute_processing<TypeProcessing, TypeIdentifierEnum,
|
||||
cacheable_t>::value,
|
||||
"TypeProcessing must have member function "
|
||||
"execute_sample_processing(TypeIdentifierEnum, const cacheable_t&)");
|
||||
|
||||
if(_type_processing == nullptr)
|
||||
{
|
||||
throw std::runtime_error("TypeProcessing is nullptr");
|
||||
}
|
||||
|
||||
ROCPROFSYS_DEBUG("Consuming buffered storage with filename: %s\n",
|
||||
m_filename.c_str());
|
||||
|
||||
std::ifstream ifs(m_filename, std::ios::binary);
|
||||
if(!ifs.good())
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "Error opening file for reading: " << m_filename << "\n";
|
||||
throw std::runtime_error(ss.str());
|
||||
}
|
||||
|
||||
struct __attribute__((packed)) sample_header
|
||||
{
|
||||
TypeIdentifierEnum type;
|
||||
size_t sample_size;
|
||||
};
|
||||
|
||||
sample_header header;
|
||||
|
||||
std::vector<uint8_t> sample;
|
||||
sample.reserve(4096);
|
||||
size_t last_capacity = sample.capacity();
|
||||
|
||||
while(!ifs.eof())
|
||||
{
|
||||
if(!ifs.good())
|
||||
{
|
||||
ROCPROFSYS_WARNING(0,
|
||||
"Stream not in good state, stopping parse. File: %s\n",
|
||||
m_filename.c_str());
|
||||
break;
|
||||
}
|
||||
|
||||
ifs.read(reinterpret_cast<char*>(&header), sizeof(header));
|
||||
|
||||
if(header.sample_size == 0 || ifs.eof())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if(ROCPROFSYS_UNLIKELY(header.sample_size > last_capacity))
|
||||
{
|
||||
sample.reserve(header.sample_size);
|
||||
last_capacity = sample.capacity();
|
||||
}
|
||||
|
||||
sample.resize(header.sample_size);
|
||||
ifs.read(reinterpret_cast<char*>(sample.data()), header.sample_size);
|
||||
|
||||
if(ifs.fail())
|
||||
{
|
||||
ROCPROFSYS_WARNING(1,
|
||||
"Bad read while consuming buffered storage. Filename: "
|
||||
"%s Bytes read: %d\n",
|
||||
m_filename.c_str(), static_cast<int>(ifs.tellg()));
|
||||
continue;
|
||||
}
|
||||
|
||||
if(header.type == TypeIdentifierEnum::fragmented_space)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
auto* data = sample.data();
|
||||
|
||||
auto sample_value = m_registry.get_type(header.type, data);
|
||||
if(sample_value.has_value())
|
||||
{
|
||||
_type_processing->execute_sample_processing(
|
||||
header.type, std::visit(
|
||||
[](auto& arg) -> cacheable_t& {
|
||||
return static_cast<cacheable_t&>(arg);
|
||||
},
|
||||
sample_value.value()));
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCPROFSYS_DEBUG("Unsupported type detected. Skipping current sample.\n");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
ifs.close();
|
||||
ROCPROFSYS_DEBUG("File parsing finished. Removing %s from file system.\n",
|
||||
m_filename.c_str());
|
||||
std::remove(m_filename.c_str());
|
||||
}
|
||||
|
||||
private:
|
||||
friend class cache_manager;
|
||||
storage_parser(std::string _filename);
|
||||
|
||||
template <typename T>
|
||||
static void process_arg(const uint8_t*& data_pos, T& arg)
|
||||
{
|
||||
if constexpr(std::is_same_v<T, std::string>)
|
||||
{
|
||||
arg = std::string((const char*) data_pos);
|
||||
data_pos += arg.size() + 1;
|
||||
}
|
||||
else if constexpr(std::is_same_v<T, std::vector<uint8_t>>)
|
||||
{
|
||||
size_t vector_size = *reinterpret_cast<const size_t*>(data_pos);
|
||||
data_pos += sizeof(size_t);
|
||||
arg.reserve(vector_size);
|
||||
std::copy_n(data_pos, vector_size, std::back_inserter(arg));
|
||||
data_pos += vector_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
arg = *reinterpret_cast<const T*>(data_pos);
|
||||
data_pos += sizeof(T);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
static void parse_data(const uint8_t* data_pos, Args&... args)
|
||||
{
|
||||
(process_arg(data_pos, args), ...);
|
||||
}
|
||||
|
||||
void invoke_callbacks(entry_type type, const storage_parsed_type_base& parsed);
|
||||
|
||||
std::string m_filename;
|
||||
std::map<entry_type, std::vector<postprocessing_callback>> m_callbacks;
|
||||
std::unique_ptr<std::function<void()>> m_on_finished_callback{ nullptr };
|
||||
std::string m_filename;
|
||||
type_registry<TypeIdentifierEnum, SupportedTypes...> m_registry;
|
||||
};
|
||||
|
||||
} // namespace trace_cache
|
||||
|
||||
+39
-23
@@ -21,37 +21,53 @@
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
#include "library/runtime.hpp"
|
||||
#include "sample_type.hpp"
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <timemory/units.hpp>
|
||||
#include <unistd.h>
|
||||
#include "core/trace_cache/cache_type_traits.hpp"
|
||||
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
|
||||
namespace rocprofsys
|
||||
{
|
||||
namespace trace_cache
|
||||
{
|
||||
constexpr size_t buffer_size = 100 * tim::units::megabyte;
|
||||
constexpr size_t flush_threshold = 80 * tim::units::megabyte;
|
||||
|
||||
const auto tmp_directory = std::string{ "/tmp/" };
|
||||
template <typename TypeIdentifierEnum, typename... SupportedTypes>
|
||||
class type_registry
|
||||
{
|
||||
static_assert(type_traits::is_enum_class_v<TypeIdentifierEnum>,
|
||||
"TypeIdentifierEnum must be an enum class");
|
||||
|
||||
const auto get_buffered_storage_filename = [](const int& ppid, const int& pid) {
|
||||
return std::string{ tmp_directory + "buffered_storage_" + std::to_string(ppid) + "_" +
|
||||
std::to_string(pid) + ".bin" };
|
||||
public:
|
||||
using variant_t = typename std::variant<SupportedTypes...>;
|
||||
|
||||
type_registry() { (register_type<SupportedTypes>(), ...); }
|
||||
|
||||
std::optional<variant_t> get_type(TypeIdentifierEnum id, uint8_t*& data)
|
||||
{
|
||||
auto it = deserializers.find(id);
|
||||
if(it != deserializers.end())
|
||||
{
|
||||
return it->second(data);
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
private:
|
||||
std::map<TypeIdentifierEnum, std::function<variant_t(uint8_t*&)>> deserializers;
|
||||
|
||||
template <typename T>
|
||||
inline void register_type()
|
||||
{
|
||||
static_assert(type_traits::has_type_identifier<T, TypeIdentifierEnum>::value,
|
||||
"Type must have type_identifier");
|
||||
static_assert(type_traits::has_deserialize<T>::value,
|
||||
"Type must have deserialize function");
|
||||
deserializers[T::type_identifier] = [](uint8_t*& data) -> variant_t {
|
||||
return deserialize<T>(data);
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
const auto get_metadata_filepath = [](const int& ppid, const int& pid) {
|
||||
return std::string{ tmp_directory + "metadata_" + std::to_string(ppid) + "_" +
|
||||
std::to_string(pid) + ".json" };
|
||||
};
|
||||
|
||||
constexpr size_t minimal_fragmented_memory_size = sizeof(entry_type) + sizeof(size_t);
|
||||
using buffer_array_t = std::array<uint8_t, buffer_size>;
|
||||
|
||||
constexpr auto ABSOLUTE = "ABS";
|
||||
constexpr auto PERCENTAGE = "%";
|
||||
|
||||
} // namespace trace_cache
|
||||
} // namespace rocprofsys
|
||||
@@ -45,7 +45,7 @@
|
||||
#include "core/rocpd/data_processor.hpp"
|
||||
#include "core/timemory.hpp"
|
||||
#include "core/trace_cache/cache_manager.hpp"
|
||||
#include "core/trace_cache/cache_utility.hpp"
|
||||
#include "core/trace_cache/cacheable.hpp"
|
||||
#include "core/trace_cache/metadata_registry.hpp"
|
||||
#include "core/utility.hpp"
|
||||
#include "library/causal/data.hpp"
|
||||
@@ -578,7 +578,7 @@ rocprofsys_init_tooling_hidden(void)
|
||||
ROCPROFSYS_DEBUG_F("State: %s -> State::Active\n",
|
||||
std::to_string(get_state()).c_str());
|
||||
|
||||
trace_cache::get_buffer_storage().start_flushing_thread(getpid());
|
||||
trace_cache::get_buffer_storage().start(getpid());
|
||||
set_state(State::Active); // set to active as very last operation
|
||||
} };
|
||||
|
||||
@@ -798,7 +798,7 @@ rocprofsys_finalize_hidden(void)
|
||||
const auto _agents = get_agent_manager_instance().get_agents();
|
||||
_manager.shutdown();
|
||||
const auto metadata_filepath =
|
||||
trace_cache::get_metadata_filepath(get_root_process_id(), getpid());
|
||||
trace_cache::utility::get_metadata_filepath(get_root_process_id(), getpid());
|
||||
_manager.get_metadata_registry().save_to_file(metadata_filepath, _agents);
|
||||
|
||||
std::quick_exit(EXIT_SUCCESS);
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
|
||||
#include "core/agent.hpp"
|
||||
#include "core/trace_cache/cache_manager.hpp"
|
||||
#include "core/trace_cache/cache_utility.hpp"
|
||||
#include "core/trace_cache/cacheable.hpp"
|
||||
#include "core/trace_cache/sample_type.hpp"
|
||||
#include <amd_smi/amdsmi.h>
|
||||
#include <cstdint>
|
||||
@@ -693,12 +693,11 @@ data::sample(uint32_t _device_id)
|
||||
// Store samples if basic metrics are enabled OR if there's advanced metric data
|
||||
if(_basic_metrics_enabled || has_data)
|
||||
{
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::amd_smi_sample, serialize_settings(m_dev_id),
|
||||
_device_id, _timestamp, m_busy_perc.gfx_activity,
|
||||
m_busy_perc.umc_activity, m_busy_perc.mm_activity,
|
||||
m_power.current_socket_power, m_temp, m_mem_usage,
|
||||
serialize_gpu_metrics(m_dev_id, metrics, capabilities));
|
||||
trace_cache::get_buffer_storage().store(trace_cache::amd_smi_sample{
|
||||
serialize_settings(m_dev_id), _device_id, _timestamp,
|
||||
m_busy_perc.gfx_activity, m_busy_perc.umc_activity,
|
||||
m_busy_perc.mm_activity, m_power.current_socket_power, m_temp,
|
||||
m_mem_usage, serialize_gpu_metrics(m_dev_id, metrics, capabilities) });
|
||||
|
||||
if(has_data) m_gpu_metrics.push_back(metrics);
|
||||
}
|
||||
|
||||
+5
-6
@@ -27,7 +27,7 @@
|
||||
#include "core/debug.hpp"
|
||||
#include "core/perfetto.hpp"
|
||||
#include "core/trace_cache/cache_manager.hpp"
|
||||
#include "core/trace_cache/cache_utility.hpp"
|
||||
#include "core/trace_cache/cacheable.hpp"
|
||||
#include "core/trace_cache/metadata_registry.hpp"
|
||||
#include "library/components/ensure_storage.hpp"
|
||||
#include "library/ptl.hpp"
|
||||
@@ -261,11 +261,10 @@ cache_backtrace_metrics_events(const uint32_t device_id, uint64_t timestamp_ns,
|
||||
const auto* line_info = "";
|
||||
|
||||
auto insert_event_and_sample = [&](const char* _track_name, double _value) {
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::pmc_event_with_sample, _track_name, timestamp_ns,
|
||||
event_metadata, stack_id, parent_stack_id, correlation_id, call_stack,
|
||||
line_info, device_id, static_cast<uint8_t>(agent_type::CPU), _track_name,
|
||||
_value);
|
||||
trace_cache::get_buffer_storage().store(trace_cache::pmc_event_with_sample{
|
||||
_track_name, timestamp_ns, event_metadata, stack_id, parent_stack_id,
|
||||
correlation_id, call_stack, line_info, device_id,
|
||||
static_cast<uint8_t>(agent_type::CPU), _track_name, _value });
|
||||
};
|
||||
|
||||
if constexpr(std::is_same_v<Category, category::thread_hardware_counter>)
|
||||
|
||||
+4
-3
@@ -27,6 +27,7 @@
|
||||
#include "core/state.hpp"
|
||||
#include "core/timemory.hpp"
|
||||
#include "core/trace_cache/cache_manager.hpp"
|
||||
#include "core/trace_cache/sample_type.hpp"
|
||||
#include "library/causal/data.hpp"
|
||||
#include "library/runtime.hpp"
|
||||
#include "library/thread_info.hpp"
|
||||
@@ -55,9 +56,9 @@ cache_region(uint64_t thread_id, const std::string& name, uint64_t start_ts,
|
||||
constexpr const char* CALLSTACK = "";
|
||||
constexpr const char* ARGUMENTS = "";
|
||||
rocprofsys::trace_cache::get_buffer_storage().store(
|
||||
rocprofsys::trace_cache::entry_type::region, thread_id, name.c_str(),
|
||||
NO_CORRELATION_ID, NO_CORRELATION_ID, start_ts, end_ts, CALLSTACK, ARGUMENTS,
|
||||
category.c_str());
|
||||
rocprofsys::trace_cache::region_sample{
|
||||
thread_id, name.c_str(), NO_CORRELATION_ID, NO_CORRELATION_ID, start_ts,
|
||||
end_ts, CALLSTACK, ARGUMENTS, category.c_str() });
|
||||
}
|
||||
|
||||
struct entry_key
|
||||
|
||||
@@ -151,12 +151,11 @@ cache_comm_data_events(const uint32_t device_id, int bytes)
|
||||
const std::string call_stack = "{}";
|
||||
const std::string line_info = "{}";
|
||||
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::pmc_event_with_sample, track_name.c_str(), timestamp_ns,
|
||||
event_metadata.c_str(), stack_id, parent_stack_id, correlation_id,
|
||||
call_stack.c_str(), line_info.c_str(), device_id,
|
||||
trace_cache::get_buffer_storage().store(trace_cache::pmc_event_with_sample{
|
||||
track_name.c_str(), timestamp_ns, event_metadata.c_str(), stack_id,
|
||||
parent_stack_id, correlation_id, call_stack.c_str(), line_info.c_str(), device_id,
|
||||
static_cast<uint8_t>(agent_type::CPU), track_name.c_str(),
|
||||
static_cast<double>(value));
|
||||
static_cast<double>(value) });
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -260,14 +260,13 @@ sample()
|
||||
auto _freqs = component::cpu_freq{}.sample();
|
||||
|
||||
// user and kernel mode times are in microseconds
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::cpu_freq_sample, _timestamp, tim::get_page_rss(),
|
||||
tim::get_virt_mem(), _rcache.get_peak_rss(),
|
||||
trace_cache::get_buffer_storage().store(trace_cache::cpu_freq_sample{
|
||||
_timestamp, tim::get_page_rss(), tim::get_virt_mem(), _rcache.get_peak_rss(),
|
||||
_rcache.get_num_priority_context_switch() +
|
||||
_rcache.get_num_voluntary_context_switch(),
|
||||
_rcache.get_num_major_page_faults() + _rcache.get_num_minor_page_faults(),
|
||||
_rcache.get_user_mode_time() * 1000, _rcache.get_kernel_mode_time() * 1000,
|
||||
serialize_freqs(_freqs));
|
||||
serialize_freqs(_freqs) });
|
||||
|
||||
data.emplace_back(
|
||||
_timestamp, tim::get_page_rss(), tim::get_virt_mem(), _rcache.get_peak_rss(),
|
||||
|
||||
@@ -191,10 +191,10 @@ cache_kokkos_event(const char* name, const char* event_type, const char* target,
|
||||
const char* line_info = "{}";
|
||||
|
||||
rocprofsys::trace_cache::get_buffer_storage().store(
|
||||
rocprofsys::trace_cache::entry_type::in_time_sample,
|
||||
rocprofsys::trait::name<category::kokkos>::value, timestamp_ns,
|
||||
event_metadata.dump().c_str(), stack_id, parent_stack_id, correlation_id,
|
||||
call_stack, line_info);
|
||||
rocprofsys::trace_cache::in_time_sample{
|
||||
rocprofsys::trait::name<category::kokkos>::value, timestamp_ns,
|
||||
event_metadata.dump().c_str(), stack_id, parent_stack_id, correlation_id,
|
||||
call_stack, line_info });
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -526,7 +526,6 @@ get_mem_alloc_address(
|
||||
}
|
||||
#endif
|
||||
|
||||
// clang-format off
|
||||
void
|
||||
cache_region(const rocprofiler_callback_tracing_record_t* record,
|
||||
const rocprofiler_timestamp_t start_timestamp,
|
||||
@@ -538,92 +537,62 @@ cache_region(const rocprofiler_callback_tracing_record_t* record,
|
||||
trace_cache::get_metadata_registry().get_callback_tracing_info();
|
||||
auto _name = std::string{ callback_tracing_info.at(record->kind, record->operation) };
|
||||
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::region,
|
||||
record->thread_id,
|
||||
_name.c_str(),
|
||||
record->correlation_id.internal,
|
||||
get_parent_stack_id(record->correlation_id),
|
||||
start_timestamp,
|
||||
end_timestamp,
|
||||
call_stack.c_str(),
|
||||
args_str.c_str(),
|
||||
category.c_str());
|
||||
trace_cache::get_buffer_storage().store(trace_cache::region_sample{
|
||||
record->thread_id, _name.c_str(), record->correlation_id.internal,
|
||||
get_parent_stack_id(record->correlation_id), start_timestamp, end_timestamp,
|
||||
call_stack.c_str(), args_str.c_str(), category.c_str() });
|
||||
}
|
||||
|
||||
void
|
||||
cache_kernel_dispatch(rocprofiler_buffer_tracing_kernel_dispatch_record_t* record, uint64_t stream_handle)
|
||||
cache_kernel_dispatch(rocprofiler_buffer_tracing_kernel_dispatch_record_t* record,
|
||||
uint64_t stream_handle)
|
||||
{
|
||||
auto queue_handle = record->dispatch_info.queue_id.handle;
|
||||
auto queue_handle = record->dispatch_info.queue_id.handle;
|
||||
|
||||
trace_cache::get_metadata_registry().add_queue(queue_handle);
|
||||
trace_cache::get_metadata_registry().add_stream(stream_handle);
|
||||
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::kernel_dispatch,
|
||||
record->start_timestamp,
|
||||
record->end_timestamp,
|
||||
record->thread_id,
|
||||
record->dispatch_info.agent_id.handle,
|
||||
record->dispatch_info.kernel_id,
|
||||
record->dispatch_info.dispatch_id,
|
||||
record->dispatch_info.queue_id.handle,
|
||||
record->correlation_id.internal,
|
||||
get_parent_stack_id(record->correlation_id),
|
||||
trace_cache::get_buffer_storage().store(trace_cache::kernel_dispatch_sample{
|
||||
record->start_timestamp, record->end_timestamp, record->thread_id,
|
||||
record->dispatch_info.agent_id.handle, record->dispatch_info.kernel_id,
|
||||
record->dispatch_info.dispatch_id, record->dispatch_info.queue_id.handle,
|
||||
record->correlation_id.internal, get_parent_stack_id(record->correlation_id),
|
||||
record->dispatch_info.private_segment_size,
|
||||
record->dispatch_info.group_segment_size,
|
||||
record->dispatch_info.workgroup_size.x,
|
||||
record->dispatch_info.workgroup_size.y,
|
||||
record->dispatch_info.workgroup_size.z,
|
||||
record->dispatch_info.grid_size.x,
|
||||
record->dispatch_info.grid_size.y,
|
||||
record->dispatch_info.grid_size.z,
|
||||
stream_handle);
|
||||
|
||||
record->dispatch_info.group_segment_size, record->dispatch_info.workgroup_size.x,
|
||||
record->dispatch_info.workgroup_size.y, record->dispatch_info.workgroup_size.z,
|
||||
record->dispatch_info.grid_size.x, record->dispatch_info.grid_size.y,
|
||||
record->dispatch_info.grid_size.z, stream_handle });
|
||||
}
|
||||
|
||||
void
|
||||
cache_memory_copy(rocprofiler_buffer_tracing_memory_copy_record_t* record, uint64_t stream_handle)
|
||||
cache_memory_copy(rocprofiler_buffer_tracing_memory_copy_record_t* record,
|
||||
uint64_t stream_handle)
|
||||
{
|
||||
trace_cache::get_metadata_registry().add_stream(stream_handle);
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::memory_copy,
|
||||
record->start_timestamp,
|
||||
record->end_timestamp,
|
||||
record->thread_id,
|
||||
record->dst_agent_id.handle,
|
||||
record->src_agent_id.handle,
|
||||
static_cast<int32_t>(record->kind),
|
||||
static_cast<int32_t>(record->operation),
|
||||
record->bytes,
|
||||
record->correlation_id.internal,
|
||||
get_parent_stack_id(record->correlation_id),
|
||||
get_mem_copy_dst_address(*record),
|
||||
get_mem_copy_src_address(*record),
|
||||
stream_handle);
|
||||
trace_cache::get_buffer_storage().store(trace_cache::memory_copy_sample{
|
||||
|
||||
record->start_timestamp, record->end_timestamp, record->thread_id,
|
||||
record->dst_agent_id.handle, record->src_agent_id.handle,
|
||||
static_cast<int32_t>(record->kind), static_cast<int32_t>(record->operation),
|
||||
record->bytes, record->correlation_id.internal,
|
||||
get_parent_stack_id(record->correlation_id), get_mem_copy_dst_address(*record),
|
||||
get_mem_copy_src_address(*record), stream_handle });
|
||||
}
|
||||
|
||||
#if (ROCPROFILER_VERSION >= 600)
|
||||
#if(ROCPROFILER_VERSION >= 600)
|
||||
void
|
||||
cache_memory_allocation(rocprofiler_buffer_tracing_memory_allocation_record_t* record, uint64_t stream_handle)
|
||||
cache_memory_allocation(rocprofiler_buffer_tracing_memory_allocation_record_t* record,
|
||||
uint64_t stream_handle)
|
||||
{
|
||||
trace_cache::get_metadata_registry().add_stream(stream_handle);
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::memory_alloc,
|
||||
record->start_timestamp,
|
||||
record->end_timestamp,
|
||||
record->thread_id,
|
||||
record->agent_id.handle,
|
||||
static_cast<int32_t>(record->kind),
|
||||
static_cast<int32_t>(record->operation),
|
||||
record->allocation_size,
|
||||
record->correlation_id.internal,
|
||||
get_parent_stack_id(record->correlation_id),
|
||||
get_mem_alloc_address(*record),
|
||||
stream_handle);
|
||||
trace_cache::get_buffer_storage().store(trace_cache::memory_allocate_sample{
|
||||
record->start_timestamp, record->end_timestamp, record->thread_id,
|
||||
record->agent_id.handle, static_cast<int32_t>(record->kind),
|
||||
static_cast<int32_t>(record->operation), record->allocation_size,
|
||||
record->correlation_id.internal, get_parent_stack_id(record->correlation_id),
|
||||
get_mem_alloc_address(*record), stream_handle });
|
||||
}
|
||||
#endif
|
||||
// clang-format on
|
||||
|
||||
std::string
|
||||
get_args_string(const function_args_t& args)
|
||||
|
||||
+4
-5
@@ -120,12 +120,11 @@ counter_event::operator()(const client_data* tool_data, ::perfetto::CounterTrack
|
||||
|
||||
auto agent = get_agent_manager_instance().get_agent_by_handle(agent_handle);
|
||||
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::pmc_event_with_sample, track_name.c_str(),
|
||||
_timing.start, event_metadata.c_str(), stack_id, parent_stack_id,
|
||||
correlation_id, call_stack.c_str(), line_info.c_str(),
|
||||
trace_cache::get_buffer_storage().store(trace_cache::pmc_event_with_sample{
|
||||
track_name.c_str(), _timing.start, event_metadata.c_str(), stack_id,
|
||||
parent_stack_id, correlation_id, call_stack.c_str(), line_info.c_str(),
|
||||
static_cast<uint32_t>(agent.device_id), static_cast<uint8_t>(agent.type),
|
||||
track_name.c_str(), value);
|
||||
track_name.c_str(), static_cast<double>(value) });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -315,13 +315,12 @@ cache_sampling_data(int64_t _tid, const std::vector<timer_sampling_data>& _timer
|
||||
auto _call_stack = generate_call_stack_json(iitr);
|
||||
auto _line_info = generate_line_info_json(iitr);
|
||||
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::backtrace_region_sample,
|
||||
trace_cache::get_buffer_storage().store(trace_cache::backtrace_region_sample{
|
||||
static_cast<uint32_t>(ROCPROFSYS_CATEGORY_TIMER_SAMPLING),
|
||||
static_cast<uint64_t>(_thread_info->index_data->system_value),
|
||||
_track_name.c_str(), _name.c_str(), itr.m_beg, itr.m_end,
|
||||
trait::name<category::timer_sampling>::value, _call_stack.c_str(),
|
||||
_line_info.c_str(), "{}");
|
||||
_line_info.c_str(), "{}" });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -348,13 +347,12 @@ cache_sampling_data(int64_t _tid, const std::vector<timer_sampling_data>& _timer
|
||||
auto _call_stack = generate_call_stack_json(iitr);
|
||||
auto _line_info = generate_line_info_json(iitr);
|
||||
|
||||
trace_cache::get_buffer_storage().store(
|
||||
trace_cache::entry_type::backtrace_region_sample,
|
||||
trace_cache::get_buffer_storage().store(trace_cache::backtrace_region_sample{
|
||||
static_cast<uint32_t>(ROCPROFSYS_CATEGORY_OVERFLOW_SAMPLING),
|
||||
static_cast<uint64_t>(_thread_info->index_data->system_value),
|
||||
_track_name.c_str(), _name.c_str(), itr.m_beg, itr.m_end,
|
||||
trait::name<category::overflow_sampling>::value, _call_stack.c_str(),
|
||||
_line_info.c_str(), "{}");
|
||||
_line_info.c_str(), "{}" });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Referens i nytt ärende
Block a user