Correlation ID Retirement + misc (#527)
* Correlation ID Retirement
- include/rocprofiler-sdk/buffer_tracing.h
- add rocprofiler_buffer_tracing_correlation_id_retirement_record_t
- include/rocprofiler-sdk/fwd.h
- ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT
- lib/rocprofiler-sdk/buffer_tracing.cpp
- kind string for correlation id retirement
- lib/rocprofiler-sdk/buffer.hpp
- emplace returns bool
- lib/rocprofiler-sdk/registration.cpp
- pass lib_instance to copy_table functions
- lib/rocprofiler-sdk/context/context.*
- update correlation_id struct
- make ref_count private
- {get,add,sub}_ref_count() functions
- sub_ref_count() performs correlation id retirement
- use stack for "latest" thread-local correlation id
- lib/rocprofiler-sdk/hip/hip.*
- migrate to new {get,add,sub}_ref_count() for correlation ids
- return in iterate_args
- handle table instance in copy_table
- lib/rocprofiler-sdk/hsa/hsa.*
- migrate to new {get,add,sub}_ref_count() for correlation ids
- return in iterate_args
- handle table instance in copy_table
- lib/rocprofiler-sdk/marker/marker.*
- migrate to new {get,add,sub}_ref_count() for correlation ids
- return in iterate_args
- handle table instance in copy_table
- lib/rocprofiler-sdk/hsa/async_copy.cpp
- migrate to new {get,add,sub}_ref_count() for correlation ids
- handle table instance in async_copy_init / async_copy_save
- lib/rocprofiler-sdk/hsa/queue.cpp
- migrate to new {get,add,sub}_ref_count() for correlation ids
- tweak to external correlation id mapping in WriteInterceptor
- tests/async-copy-tracing/validate.py
- check retired_correlation_ids
- tests/common/serialization.hpp
- support rocprofiler_buffer_tracing_correlation_id_retirement_record_t
- tests/kernel-tracing/validate.py
- check retired_correlation_ids
- tests/common/CMakeLists.txt
- perfetto external project
- tests/common/perfetto.hpp
- perfetto categories + aliases
- add_perfetto_annotation
- metaprogramming helpers
- tests/tools/CMakeLists.txt
- link to tests-perfetto
- tests/tools/json-tool.cpp
- demangling functions
- serialization of marker API callback args
- reduce parallel bottleneck in tool_tracing_callback
- support correlation id retirement
- Multiple threads for buffers
- Support ROCPROFILER_TOOL_CONTEXTS_EXCLUDE env variable
- write_perfetto() function
* Update tests/rocprofv3/tracing/validate.py
- tweak test_hsa_api_trace
* Update PTL submodule
- fixes for data race during destruction of task
* Update lib/rocprofiler-sdk/buffer.*
- unique_buffer_vec_t uses std::unique_ptr instead of allocator::unique_static_ptr_t
* Reduce timeouts in counter collection samples [skip ci]
* Update tests/tools/json-tool.cpp
- tweak demangle(string_view, int*) -> demangle(string_view, int&)
* Update lib/rocprofiler-sdk/hsa/async_copy.cpp
- move sub_ref_count() to later in async_copy_handler to delay retirement slightly more
[ROCm/rocprofiler-sdk commit: 875f53b608]
Este commit está contenido en:
cometido por
GitHub
padre
a651f1e215
commit
a360de4550
+1
-1
Submodule projects/rocprofiler-sdk/external/ptl updated: 7c389294d7...12ca26ac2b
@@ -46,7 +46,7 @@ set_tests_properties(
|
||||
counter-collection-buffer
|
||||
PROPERTIES
|
||||
TIMEOUT
|
||||
600
|
||||
120
|
||||
LABELS
|
||||
"samples"
|
||||
ENVIRONMENT
|
||||
@@ -74,7 +74,7 @@ set_tests_properties(
|
||||
counter-collection-callback
|
||||
PROPERTIES
|
||||
TIMEOUT
|
||||
600
|
||||
120
|
||||
LABELS
|
||||
"samples"
|
||||
ENVIRONMENT
|
||||
@@ -103,7 +103,7 @@ set_tests_properties(
|
||||
counter-collection-print-functional-counters
|
||||
PROPERTIES
|
||||
TIMEOUT
|
||||
300
|
||||
120
|
||||
LABELS
|
||||
"samples"
|
||||
ENVIRONMENT
|
||||
|
||||
@@ -164,6 +164,25 @@ typedef struct
|
||||
// Not Sure What is the info needed here?
|
||||
} rocprofiler_buffer_tracing_scratch_memory_record_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Buffer Correlation ID Retirement Tracer Record.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint64_t size; ///< size of this struct
|
||||
rocprofiler_buffer_tracing_kind_t kind;
|
||||
rocprofiler_timestamp_t timestamp;
|
||||
uint64_t internal_correlation_id;
|
||||
|
||||
/// @var kind
|
||||
/// @brief ::ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT
|
||||
/// @var timestamp
|
||||
/// @brief Timestamp (in nanosec) of when rocprofiler detected the correlation ID could be
|
||||
/// retired
|
||||
/// @var internal_correlation_id
|
||||
/// @brief Only internal correlation ID is provided
|
||||
} rocprofiler_buffer_tracing_correlation_id_retirement_record_t;
|
||||
|
||||
/**
|
||||
* @brief Callback function for mapping @ref rocprofiler_buffer_tracing_kind_t ids to
|
||||
* string names. @see rocprofiler_iterate_buffer_trace_kind_names.
|
||||
|
||||
@@ -171,6 +171,7 @@ typedef enum // NOLINT(performance-enum-size)
|
||||
ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, ///< Buffer kernel dispatch info
|
||||
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, ///< Buffer page migration info
|
||||
ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY, ///< Buffer scratch memory reclaimation info
|
||||
ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT, ///< Correlation ID in no longer in use
|
||||
ROCPROFILER_BUFFER_TRACING_LAST,
|
||||
} rocprofiler_buffer_tracing_kind_t;
|
||||
|
||||
|
||||
@@ -22,9 +22,6 @@
|
||||
|
||||
#include "lib/rocprofiler-sdk/buffer.hpp"
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include "lib/common/container/stable_vector.hpp"
|
||||
#include "lib/common/static_object.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
@@ -34,6 +31,11 @@
|
||||
#include "lib/rocprofiler-sdk/internal_threading.hpp"
|
||||
#include "lib/rocprofiler-sdk/registration.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include <glog/logging.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <exception>
|
||||
#include <mutex>
|
||||
@@ -121,7 +123,7 @@ allocate_buffer()
|
||||
|
||||
// create an entry in the registered
|
||||
auto& _cfg_v = CHECK_NOTNULL(get_buffers())->back();
|
||||
_cfg_v = allocator::make_unique_static<buffer::instance>();
|
||||
_cfg_v = std::make_unique<buffer::instance>();
|
||||
auto* _cfg = _cfg_v.get();
|
||||
|
||||
if(!_cfg) return std::nullopt;
|
||||
@@ -150,6 +152,8 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait)
|
||||
|
||||
auto* buff = get_buffer(buffer_id);
|
||||
|
||||
if(!buff) return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND;
|
||||
|
||||
auto* task_group =
|
||||
internal_threading::get_task_group(rocprofiler_callback_thread_t{buff->task_group_id});
|
||||
|
||||
@@ -211,11 +215,10 @@ flush(rocprofiler_buffer_id_t buffer_id, bool wait)
|
||||
buff_v->syncer.clear();
|
||||
};
|
||||
|
||||
task_group->exec(_task);
|
||||
task_group->exec(std::move(_task));
|
||||
if(wait)
|
||||
{
|
||||
while(task_group->size() > 0)
|
||||
task_group->join();
|
||||
task_group->join();
|
||||
}
|
||||
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
@@ -283,6 +286,8 @@ rocprofiler_destroy_buffer(rocprofiler_buffer_id_t buffer_id)
|
||||
auto* buffers = CHECK_NOTNULL(rocprofiler::buffer::get_buffers());
|
||||
auto& buff = buffers->at(buffer_id.handle - offset);
|
||||
|
||||
if(!buff) return ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND;
|
||||
|
||||
// buffer is currently being flushed or destroyed
|
||||
if(buff->syncer.test_and_set()) return ROCPROFILER_STATUS_ERROR_BUFFER_BUSY;
|
||||
|
||||
@@ -290,6 +295,7 @@ rocprofiler_destroy_buffer(rocprofiler_buffer_id_t buffer_id)
|
||||
itr.reset();
|
||||
|
||||
buff->syncer.clear();
|
||||
buff.reset();
|
||||
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
#include "lib/common/container/record_header_buffer.hpp"
|
||||
#include "lib/common/container/stable_vector.hpp"
|
||||
#include "lib/common/demangle.hpp"
|
||||
#include "lib/rocprofiler-sdk/allocator.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
@@ -56,14 +55,13 @@ struct instance
|
||||
rocprofiler_buffer_policy_t policy = ROCPROFILER_BUFFER_POLICY_NONE;
|
||||
|
||||
template <typename Tp>
|
||||
void emplace(uint32_t, uint32_t, Tp&);
|
||||
bool emplace(uint32_t, uint32_t, Tp&);
|
||||
|
||||
buffer_t& get_internal_buffer();
|
||||
buffer_t& get_internal_buffer(size_t);
|
||||
};
|
||||
|
||||
using unique_buffer_vec_t =
|
||||
common::container::stable_vector<allocator::unique_static_ptr_t<instance>, 4>;
|
||||
using unique_buffer_vec_t = common::container::stable_vector<std::unique_ptr<instance>, 4>;
|
||||
|
||||
bool
|
||||
is_valid_buffer_id(rocprofiler_buffer_id_t id);
|
||||
@@ -114,14 +112,15 @@ rocprofiler::buffer::flush(uint64_t buffer_idx, bool wait)
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
inline void
|
||||
inline bool
|
||||
rocprofiler::buffer::instance::emplace(uint32_t category, uint32_t kind, Tp& value)
|
||||
{
|
||||
// get the index of the current buffer
|
||||
auto get_idx = [this]() { return buffer_idx.load(std::memory_order_acquire) % buffers.size(); };
|
||||
|
||||
auto idx = get_idx();
|
||||
if(!buffers.at(idx).emplace(category, kind, value))
|
||||
auto idx = get_idx();
|
||||
auto success = buffers.at(idx).emplace(category, kind, value);
|
||||
if(!success)
|
||||
{
|
||||
if(buffers.at(idx).capacity() < sizeof(value))
|
||||
{
|
||||
@@ -135,10 +134,9 @@ rocprofiler::buffer::instance::emplace(uint32_t category, uint32_t kind, Tp& val
|
||||
if(policy == ROCPROFILER_BUFFER_POLICY_LOSSLESS)
|
||||
{
|
||||
// blocks until buffer is flushed
|
||||
bool success = false;
|
||||
while(!success)
|
||||
{
|
||||
buffer::flush(buffer_id, true);
|
||||
buffer::flush(buffer_id, false);
|
||||
idx = get_idx();
|
||||
success = buffers.at(idx).emplace(category, kind, value);
|
||||
}
|
||||
@@ -154,4 +152,6 @@ rocprofiler::buffer::instance::emplace(uint32_t category, uint32_t kind, Tp& val
|
||||
// flush without syncing
|
||||
buffer::flush(buffer_id, false);
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
@@ -77,6 +77,7 @@ ROCPROFILER_BUFFER_TRACING_KIND_STRING(MEMORY_COPY)
|
||||
ROCPROFILER_BUFFER_TRACING_KIND_STRING(KERNEL_DISPATCH)
|
||||
ROCPROFILER_BUFFER_TRACING_KIND_STRING(PAGE_MIGRATION)
|
||||
ROCPROFILER_BUFFER_TRACING_KIND_STRING(SCRATCH_MEMORY)
|
||||
ROCPROFILER_BUFFER_TRACING_KIND_STRING(CORRELATION_ID_RETIREMENT)
|
||||
|
||||
template <size_t Idx, size_t... Tail>
|
||||
std::pair<const char*, size_t>
|
||||
@@ -222,6 +223,7 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_
|
||||
case ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH:
|
||||
case ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION:
|
||||
case ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY:
|
||||
case ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT:
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
@@ -320,6 +322,7 @@ rocprofiler_iterate_buffer_tracing_kind_operations(
|
||||
case ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH:
|
||||
case ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION:
|
||||
case ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY:
|
||||
case ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT:
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include "lib/common/container/small_vector.hpp"
|
||||
#include "lib/common/container/stable_vector.hpp"
|
||||
#include "lib/common/static_object.hpp"
|
||||
#include "lib/common/synchronized.hpp"
|
||||
@@ -118,10 +119,10 @@ get_correlation_id_map()
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto*&
|
||||
auto&
|
||||
get_latest_correlation_id_impl()
|
||||
{
|
||||
static thread_local correlation_id* _v = nullptr;
|
||||
static thread_local auto _v = common::container::small_vector<correlation_id*, 16>{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
@@ -133,6 +134,53 @@ get_unique_internal_id()
|
||||
}
|
||||
} // namespace
|
||||
|
||||
uint32_t
|
||||
correlation_id::add_ref_count()
|
||||
{
|
||||
return m_ref_count.fetch_add(1);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
correlation_id::sub_ref_count()
|
||||
{
|
||||
auto _ret = m_ref_count.fetch_sub(1);
|
||||
|
||||
LOG_IF(FATAL, _ret == 0) << "correlation id underflow";
|
||||
|
||||
if(_ret == 1)
|
||||
{
|
||||
auto ctxs = get_active_contexts([](const context* ctx) {
|
||||
return (ctx->buffered_tracer &&
|
||||
(ctx->buffered_tracer->domains(
|
||||
ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT)));
|
||||
});
|
||||
|
||||
auto record = rocprofiler_buffer_tracing_correlation_id_retirement_record_t{
|
||||
.size = sizeof(rocprofiler_buffer_tracing_correlation_id_retirement_record_t),
|
||||
.kind = ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT,
|
||||
.timestamp = common::timestamp_ns(),
|
||||
.internal_correlation_id = internal};
|
||||
|
||||
if(!ctxs.empty())
|
||||
{
|
||||
for(const auto* itr : ctxs)
|
||||
{
|
||||
auto* _buffer = buffer::get_buffer(itr->buffered_tracer->buffer_data.at(
|
||||
ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT));
|
||||
|
||||
auto success = CHECK_NOTNULL(_buffer)->emplace(
|
||||
ROCPROFILER_BUFFER_CATEGORY_TRACING,
|
||||
ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT,
|
||||
record);
|
||||
|
||||
LOG_IF(FATAL, !success) << "failed to emplace correlation id retirement";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return _ret;
|
||||
}
|
||||
|
||||
correlation_id*
|
||||
correlation_tracing_service::construct(uint32_t _init_ref_count)
|
||||
{
|
||||
@@ -142,7 +190,7 @@ correlation_tracing_service::construct(uint32_t _init_ref_count)
|
||||
auto& ret = corr_id_map->wlock([](auto& data) -> auto& { return data.emplace_back(); });
|
||||
ret = std::make_unique<correlation_id>(_init_ref_count, common::get_tid(), _internal_id);
|
||||
|
||||
get_latest_correlation_id_impl() = ret.get();
|
||||
get_latest_correlation_id_impl().emplace_back(ret.get());
|
||||
|
||||
return ret.get();
|
||||
}
|
||||
@@ -150,13 +198,33 @@ correlation_tracing_service::construct(uint32_t _init_ref_count)
|
||||
correlation_id*
|
||||
get_latest_correlation_id()
|
||||
{
|
||||
return get_latest_correlation_id_impl();
|
||||
return (get_latest_correlation_id_impl().empty()) ? nullptr
|
||||
: get_latest_correlation_id_impl().back();
|
||||
}
|
||||
|
||||
void
|
||||
pop_latest_correlation_id(const correlation_id* val)
|
||||
const correlation_id*
|
||||
pop_latest_correlation_id(correlation_id* val)
|
||||
{
|
||||
if(get_latest_correlation_id_impl() == val) get_latest_correlation_id_impl() = nullptr;
|
||||
if(!val)
|
||||
{
|
||||
LOG(ERROR) << "passed nullptr to correlation id";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if(get_latest_correlation_id_impl().empty())
|
||||
{
|
||||
LOG(ERROR) << "empty thread-local correlation id stack";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
LOG_IF(ERROR, get_latest_correlation_id_impl().back() != val)
|
||||
<< "pop_latest_correlation_id is happening out of order for " << val->internal
|
||||
<< ". top of stack is " << get_latest_correlation_id_impl().back()->internal;
|
||||
|
||||
get_latest_correlation_id_impl().pop_back();
|
||||
|
||||
return (get_latest_correlation_id_impl().empty()) ? nullptr
|
||||
: get_latest_correlation_id_impl().back();
|
||||
}
|
||||
|
||||
context_array_t&
|
||||
|
||||
@@ -59,9 +59,9 @@ struct correlation_id
|
||||
// thus, after two HSA buffer flushes, we will have received all the PC samples for
|
||||
// the
|
||||
correlation_id(uint32_t _cnt, rocprofiler_thread_id_t _tid, uint64_t _internal) noexcept
|
||||
: ref_count{_cnt}
|
||||
, thread_idx{_tid}
|
||||
: thread_idx{_tid}
|
||||
, internal{_internal}
|
||||
, m_ref_count{_cnt}
|
||||
{}
|
||||
|
||||
correlation_id() = default;
|
||||
@@ -72,9 +72,15 @@ struct correlation_id
|
||||
correlation_id& operator=(const correlation_id&) = delete;
|
||||
correlation_id& operator=(correlation_id&&) noexcept = delete;
|
||||
|
||||
std::atomic<uint32_t> ref_count = {};
|
||||
rocprofiler_thread_id_t thread_idx = 0;
|
||||
uint64_t internal = 0;
|
||||
|
||||
uint32_t get_ref_count() const { return m_ref_count.load(); }
|
||||
uint32_t add_ref_count();
|
||||
uint32_t sub_ref_count();
|
||||
|
||||
private:
|
||||
std::atomic<uint32_t> m_ref_count = {};
|
||||
};
|
||||
|
||||
correlation_id*
|
||||
@@ -84,8 +90,8 @@ get_correlation_id(rocprofiler_thread_id_t tid, uint64_t internal_id);
|
||||
correlation_id*
|
||||
get_latest_correlation_id();
|
||||
|
||||
void
|
||||
pop_latest_correlation_id(const correlation_id*);
|
||||
const correlation_id*
|
||||
pop_latest_correlation_id(correlation_id*);
|
||||
|
||||
/// permits tools opportunity to modify the correlation id based on the domain, op, and
|
||||
/// the rocprofiler generated correlation id
|
||||
|
||||
@@ -308,7 +308,7 @@ hip_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
|
||||
}
|
||||
|
||||
// decrement the reference count before invoking
|
||||
corr_id->ref_count.fetch_sub(1);
|
||||
corr_id->sub_ref_count();
|
||||
|
||||
auto _ret = exec(info_type::get_table_func(), std::forward<Args>(args)...);
|
||||
|
||||
@@ -360,7 +360,7 @@ hip_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
|
||||
}
|
||||
|
||||
// decrement the reference count after usage in the callback/buffers
|
||||
corr_id->ref_count.fetch_sub(1);
|
||||
corr_id->sub_ref_count();
|
||||
|
||||
context::pop_latest_correlation_id(corr_id);
|
||||
|
||||
@@ -454,6 +454,7 @@ iterate_args(const uint32_t id,
|
||||
user_data);
|
||||
if(ret != 0) break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if constexpr(sizeof...(OpIdxTail) > 0)
|
||||
iterate_args<TableIdx>(id, data, func, user_data, std::index_sequence<OpIdxTail...>{});
|
||||
@@ -485,7 +486,7 @@ should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain,
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx>
|
||||
void
|
||||
copy_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
|
||||
copy_table(Tp* _orig, uint64_t _tbl_instance, std::integral_constant<size_t, OpIdx>)
|
||||
{
|
||||
using table_type = typename hip_table_lookup<TableIdx>::type;
|
||||
|
||||
@@ -493,24 +494,34 @@ copy_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
|
||||
{
|
||||
auto _info = hip_api_info<TableIdx, OpIdx>{};
|
||||
|
||||
LOG(INFO) << "copying table entry for " << _info.name;
|
||||
|
||||
// make sure we don't access a field that doesn't exist in input table
|
||||
if(_info.offset() >= _orig->size) return;
|
||||
|
||||
// 1. get the sub-table containing the function pointer in original table
|
||||
// 2. get reference to function pointer in sub-table in original table
|
||||
auto& _table = _info.get_table(_orig);
|
||||
auto& _func = _info.get_table_func(_table);
|
||||
auto& _orig_table = _info.get_table(_orig);
|
||||
auto& _orig_func = _info.get_table_func(_orig_table);
|
||||
// 3. get the sub-table containing the function pointer in saved table
|
||||
// 4. get reference to function pointer in sub-table in saved table
|
||||
// 5. save the original function in the saved table
|
||||
auto& _saved = _info.get_table(get_table());
|
||||
auto& _ofunc = _info.get_table_func(_saved);
|
||||
_ofunc = _func;
|
||||
}
|
||||
auto& _copy_table = _info.get_table(get_table());
|
||||
auto& _copy_func = _info.get_table_func(_copy_table);
|
||||
|
||||
(void) _orig;
|
||||
LOG_IF(FATAL, _copy_func && _tbl_instance == 0)
|
||||
<< _info.name << " has non-null function pointer " << _copy_func
|
||||
<< " despite this being the first instance of the library being copies";
|
||||
|
||||
if(!_copy_func)
|
||||
{
|
||||
LOG(INFO) << "copying table entry for " << _info.name;
|
||||
_copy_func = _orig_func;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG(INFO) << "skipping copying table entry for " << _info.name
|
||||
<< " from table instance " << _tbl_instance;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx>
|
||||
@@ -540,17 +551,15 @@ update_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
|
||||
auto& _func = _info.get_table_func(_table);
|
||||
_func = _info.get_functor(_func);
|
||||
}
|
||||
|
||||
(void) _orig;
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx, size_t... OpIdxTail>
|
||||
void
|
||||
copy_table(Tp* _orig, std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
copy_table(Tp* _orig, uint64_t _tbl_instance, std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
{
|
||||
copy_table<TableIdx>(_orig, std::integral_constant<size_t, OpIdx>{});
|
||||
copy_table<TableIdx>(_orig, _tbl_instance, std::integral_constant<size_t, OpIdx>{});
|
||||
if constexpr(sizeof...(OpIdxTail) > 0)
|
||||
copy_table<TableIdx>(_orig, std::index_sequence<OpIdxTail...>{});
|
||||
copy_table<TableIdx>(_orig, _tbl_instance, std::index_sequence<OpIdxTail...>{});
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx, size_t... OpIdxTail>
|
||||
@@ -617,11 +626,12 @@ iterate_args(uint32_t id,
|
||||
|
||||
template <typename TableT>
|
||||
void
|
||||
copy_table(TableT* _orig)
|
||||
copy_table(TableT* _orig, uint64_t _tbl_instance)
|
||||
{
|
||||
constexpr auto TableIdx = hip_table_id_lookup<TableT>::value;
|
||||
if(_orig)
|
||||
copy_table<TableIdx>(_orig, std::make_index_sequence<hip_domain_info<TableIdx>::last>{});
|
||||
copy_table<TableIdx>(
|
||||
_orig, _tbl_instance, std::make_index_sequence<hip_domain_info<TableIdx>::last>{});
|
||||
}
|
||||
|
||||
template <typename TableT>
|
||||
@@ -637,7 +647,7 @@ using hip_api_data_t = rocprofiler_callback_tracing_hip_api_data_t;
|
||||
using hip_op_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t;
|
||||
|
||||
#define INSTANTIATE_HIP_TABLE_FUNC(TABLE_TYPE, TABLE_IDX) \
|
||||
template void copy_table<TABLE_TYPE>(TABLE_TYPE * _tbl); \
|
||||
template void copy_table<TABLE_TYPE>(TABLE_TYPE * _tbl, uint64_t _instv); \
|
||||
template void update_table<TABLE_TYPE>(TABLE_TYPE * _tbl); \
|
||||
template const char* name_by_id<TABLE_IDX>(uint32_t); \
|
||||
template uint32_t id_by_name<TABLE_IDX>(const char*); \
|
||||
|
||||
@@ -103,7 +103,7 @@ iterate_args(uint32_t id,
|
||||
|
||||
template <typename TableT>
|
||||
void
|
||||
copy_table(TableT* _orig);
|
||||
copy_table(TableT* _orig, uint64_t _tbl_instance);
|
||||
|
||||
template <typename TableT>
|
||||
void
|
||||
|
||||
@@ -249,8 +249,6 @@ async_copy_handler(hsa_signal_value_t signal_value, void* arg)
|
||||
}
|
||||
}
|
||||
|
||||
if(_corr_id) _corr_id->ref_count.fetch_sub(1);
|
||||
|
||||
auto* orig_amd_signal = convert_hsa_handle<amd_signal_t>(_data->orig_signal);
|
||||
|
||||
// Original intercepted signal completion
|
||||
@@ -277,6 +275,8 @@ async_copy_handler(hsa_signal_value_t signal_value, void* arg)
|
||||
get_active_signals()->fetch_sub(1);
|
||||
}
|
||||
|
||||
if(_corr_id) _corr_id->sub_ref_count();
|
||||
|
||||
return (signal_value > 0);
|
||||
}
|
||||
|
||||
@@ -487,7 +487,7 @@ async_copy_impl(Args... args)
|
||||
for(const auto* ctx : _data->contexts)
|
||||
extern_corr_ids.emplace(ctx,
|
||||
ctx->correlation_tracer.external_correlator.get(_data->tid));
|
||||
_data->correlation_id->ref_count.fetch_add(1);
|
||||
_data->correlation_id->add_ref_count();
|
||||
}
|
||||
|
||||
_data->orig_signal = _completion_signal;
|
||||
@@ -505,27 +505,46 @@ auto get_async_copy_impl(RetT (*)(Args...))
|
||||
|
||||
template <size_t TableIdx, size_t OpIdx>
|
||||
void
|
||||
async_copy_save(hsa_amd_ext_table_t* _orig)
|
||||
async_copy_save(hsa_amd_ext_table_t* _orig, uint64_t _tbl_instance)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<hsa_amd_ext_table_t, typename hsa_table_lookup<TableIdx>::type>::value,
|
||||
"unexpected type");
|
||||
|
||||
auto _meta = hsa_api_meta<TableIdx, OpIdx>{};
|
||||
auto& _table = _meta.get_table(_orig);
|
||||
auto& _func = _meta.get_table_func(_table);
|
||||
get_next_dispatch<TableIdx, OpIdx>() = _func;
|
||||
auto _meta = hsa_api_meta<TableIdx, OpIdx>{};
|
||||
|
||||
// original table and function
|
||||
auto& _orig_table = _meta.get_table(_orig);
|
||||
auto& _orig_func = _meta.get_table_func(_orig_table);
|
||||
|
||||
// table with copy function
|
||||
auto& _copy_func = get_next_dispatch<TableIdx, OpIdx>();
|
||||
|
||||
LOG_IF(FATAL, _copy_func && _tbl_instance == 0)
|
||||
<< _meta.name << " has non-null function pointer " << _copy_func
|
||||
<< " despite this being the first instance of the library being copies";
|
||||
|
||||
if(!_copy_func)
|
||||
{
|
||||
LOG(INFO) << "copying table entry for " << _meta.name;
|
||||
_copy_func = _orig_func;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG(INFO) << "skipping copying table entry for " << _meta.name << " from table instance "
|
||||
<< _tbl_instance;
|
||||
}
|
||||
}
|
||||
|
||||
template <size_t TableIdx, size_t... OpIdx>
|
||||
void
|
||||
async_copy_save(hsa_amd_ext_table_t* _orig, std::index_sequence<OpIdx...>)
|
||||
async_copy_save(hsa_amd_ext_table_t* _orig, uint64_t _tbl_instance, std::index_sequence<OpIdx...>)
|
||||
{
|
||||
static_assert(
|
||||
std::is_same<hsa_amd_ext_table_t, typename hsa_table_lookup<TableIdx>::type>::value,
|
||||
"unexpected type");
|
||||
|
||||
(async_copy_save<TableIdx, OpIdx>(_orig), ...);
|
||||
(async_copy_save<TableIdx, OpIdx>(_orig, _tbl_instance), ...);
|
||||
}
|
||||
|
||||
template <size_t TableIdx, size_t OpIdx>
|
||||
@@ -594,12 +613,12 @@ get_names()
|
||||
} // namespace async_copy
|
||||
|
||||
void
|
||||
async_copy_init(hsa_api_table_t* _orig)
|
||||
async_copy_init(hsa_api_table_t* _orig, uint64_t _tbl_instance)
|
||||
{
|
||||
if(_orig && _orig->amd_ext_)
|
||||
{
|
||||
async_copy::async_copy_save<ROCPROFILER_HSA_TABLE_ID_AmdExt>(
|
||||
_orig->amd_ext_, async_copy::async_copy_index_seq_t{});
|
||||
_orig->amd_ext_, _tbl_instance, async_copy::async_copy_index_seq_t{});
|
||||
|
||||
auto ctxs = context::get_registered_contexts(async_copy::context_filter);
|
||||
if(!ctxs.empty())
|
||||
|
||||
@@ -46,7 +46,7 @@ get_ids();
|
||||
} // namespace async_copy
|
||||
|
||||
void
|
||||
async_copy_init(hsa_api_table_t* _orig);
|
||||
async_copy_init(hsa_api_table_t* _orig, uint64_t _tbl_instance);
|
||||
|
||||
void
|
||||
async_copy_fini();
|
||||
|
||||
@@ -35,6 +35,8 @@
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/hsa/api_id.h>
|
||||
#include <rocprofiler-sdk/hsa/core_api_id.h>
|
||||
#include <rocprofiler-sdk/hsa/table_id.h>
|
||||
|
||||
#include <glog/logging.h>
|
||||
|
||||
@@ -364,7 +366,7 @@ hsa_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
|
||||
}
|
||||
|
||||
// decrement the reference count before invoking
|
||||
corr_id->ref_count.fetch_sub(1);
|
||||
corr_id->sub_ref_count();
|
||||
|
||||
auto _ret = exec(info_type::get_table_func(), std::forward<Args>(args)...);
|
||||
|
||||
@@ -416,7 +418,7 @@ hsa_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
|
||||
}
|
||||
|
||||
// decrement the reference count after usage in the callback/buffers
|
||||
corr_id->ref_count.fetch_sub(1);
|
||||
corr_id->sub_ref_count();
|
||||
|
||||
context::pop_latest_correlation_id(corr_id);
|
||||
|
||||
@@ -510,6 +512,7 @@ iterate_args(const uint32_t id,
|
||||
user_data);
|
||||
if(ret != 0) break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if constexpr(sizeof...(IdxTail) > 0)
|
||||
iterate_args<TableIdx>(id, data, func, user_data, std::index_sequence<IdxTail...>{});
|
||||
@@ -542,7 +545,7 @@ should_wrap_functor(const context::context_array_t& _contexts,
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx>
|
||||
void
|
||||
copy_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
|
||||
copy_table(Tp* _orig, uint64_t _tbl_instance, std::integral_constant<size_t, OpIdx>)
|
||||
{
|
||||
using table_type = typename hsa_table_lookup<TableIdx>::type;
|
||||
|
||||
@@ -550,24 +553,34 @@ copy_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
|
||||
{
|
||||
auto _info = hsa_api_info<TableIdx, OpIdx>{};
|
||||
|
||||
LOG(INFO) << "copying table entry for " << _info.name;
|
||||
|
||||
// make sure we don't access a field that doesn't exist in input table
|
||||
if(_info.offset() >= _orig->version.minor_id) return;
|
||||
|
||||
// 1. get the sub-table containing the function pointer in original table
|
||||
// 2. get reference to function pointer in sub-table in original table
|
||||
auto& _table = _info.get_table(_orig);
|
||||
auto& _func = _info.get_table_func(_table);
|
||||
auto& _orig_table = _info.get_table(_orig);
|
||||
auto& _orig_func = _info.get_table_func(_orig_table);
|
||||
// 3. get the sub-table containing the function pointer in saved table
|
||||
// 4. get reference to function pointer in sub-table in saved table
|
||||
// 5. save the original function in the saved table
|
||||
auto& _saved = _info.get_table(hsa_table_lookup<TableIdx>{}());
|
||||
auto& _ofunc = _info.get_table_func(_saved);
|
||||
_ofunc = _func;
|
||||
}
|
||||
auto& _copy_table = _info.get_table(hsa_table_lookup<TableIdx>{}());
|
||||
auto& _copy_func = _info.get_table_func(_copy_table);
|
||||
|
||||
(void) _orig;
|
||||
LOG_IF(FATAL, _copy_func && _tbl_instance == 0)
|
||||
<< _info.name << " has non-null function pointer " << _copy_func
|
||||
<< " despite this being the first instance of the library being copies";
|
||||
|
||||
if(!_copy_func)
|
||||
{
|
||||
LOG(INFO) << "copying table entry for " << _info.name;
|
||||
_copy_func = _orig_func;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG(INFO) << "skipping copying table entry for " << _info.name
|
||||
<< " from table instance " << _tbl_instance;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx>
|
||||
@@ -602,17 +615,15 @@ update_table(const context::context_array_t& _contexts,
|
||||
auto& _func = _info.get_table_func(_table);
|
||||
_func = _info.get_functor(_func);
|
||||
}
|
||||
|
||||
(void) _orig;
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx, size_t... OpIdxTail>
|
||||
void
|
||||
copy_table(Tp* _orig, std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
copy_table(Tp* _orig, uint64_t _tbl_instance, std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
{
|
||||
copy_table<TableIdx>(_orig, std::integral_constant<size_t, OpIdx>{});
|
||||
copy_table<TableIdx>(_orig, _tbl_instance, std::integral_constant<size_t, OpIdx>{});
|
||||
if constexpr(sizeof...(OpIdxTail) > 0)
|
||||
copy_table<TableIdx>(_orig, std::index_sequence<OpIdxTail...>{});
|
||||
copy_table<TableIdx>(_orig, _tbl_instance, std::index_sequence<OpIdxTail...>{});
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx, size_t... OpIdxTail>
|
||||
@@ -681,11 +692,12 @@ iterate_args(uint32_t id,
|
||||
|
||||
template <typename TableT>
|
||||
void
|
||||
copy_table(TableT* _orig)
|
||||
copy_table(TableT* _orig, uint64_t _tbl_instance)
|
||||
{
|
||||
constexpr auto TableIdx = hsa_table_id_lookup<TableT>::value;
|
||||
if(_orig)
|
||||
copy_table<TableIdx>(_orig, std::make_index_sequence<hsa_domain_info<TableIdx>::last>{});
|
||||
copy_table<TableIdx>(
|
||||
_orig, _tbl_instance, std::make_index_sequence<hsa_domain_info<TableIdx>::last>{});
|
||||
}
|
||||
|
||||
template <typename TableT>
|
||||
@@ -704,8 +716,8 @@ update_table(TableT* _orig)
|
||||
using iterate_args_data_t = rocprofiler_callback_tracing_hsa_api_data_t;
|
||||
using iterate_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t;
|
||||
|
||||
#define INSTANTIATE_MARKER_TABLE_FUNC(TABLE_TYPE, TABLE_IDX) \
|
||||
template void copy_table<TABLE_TYPE>(TABLE_TYPE * _tbl); \
|
||||
#define INSTANTIATE_HSA_TABLE_FUNC(TABLE_TYPE, TABLE_IDX) \
|
||||
template void copy_table<TABLE_TYPE>(TABLE_TYPE * _tbl, uint64_t _instv); \
|
||||
template void update_table<TABLE_TYPE>(TABLE_TYPE * _tbl); \
|
||||
template const char* name_by_id<TABLE_IDX>(uint32_t); \
|
||||
template uint32_t id_by_name<TABLE_IDX>(const char*); \
|
||||
@@ -714,11 +726,11 @@ using iterate_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t;
|
||||
template void iterate_args<TABLE_IDX>( \
|
||||
uint32_t, const iterate_args_data_t&, iterate_args_cb_t, void*);
|
||||
|
||||
INSTANTIATE_MARKER_TABLE_FUNC(hsa_core_table_t, ROCPROFILER_HSA_TABLE_ID_Core)
|
||||
INSTANTIATE_MARKER_TABLE_FUNC(hsa_amd_ext_table_t, ROCPROFILER_HSA_TABLE_ID_AmdExt)
|
||||
INSTANTIATE_MARKER_TABLE_FUNC(hsa_img_ext_table_t, ROCPROFILER_HSA_TABLE_ID_ImageExt)
|
||||
INSTANTIATE_MARKER_TABLE_FUNC(hsa_fini_ext_table_t, ROCPROFILER_HSA_TABLE_ID_FinalizeExt)
|
||||
INSTANTIATE_HSA_TABLE_FUNC(hsa_core_table_t, ROCPROFILER_HSA_TABLE_ID_Core)
|
||||
INSTANTIATE_HSA_TABLE_FUNC(hsa_amd_ext_table_t, ROCPROFILER_HSA_TABLE_ID_AmdExt)
|
||||
INSTANTIATE_HSA_TABLE_FUNC(hsa_img_ext_table_t, ROCPROFILER_HSA_TABLE_ID_ImageExt)
|
||||
INSTANTIATE_HSA_TABLE_FUNC(hsa_fini_ext_table_t, ROCPROFILER_HSA_TABLE_ID_FinalizeExt)
|
||||
|
||||
#undef INSTANTIATE_MARKER_TABLE_FUNC
|
||||
#undef INSTANTIATE_HSA_TABLE_FUNC
|
||||
} // namespace hsa
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -125,7 +125,7 @@ iterate_args(uint32_t id,
|
||||
|
||||
template <typename TableT>
|
||||
void
|
||||
copy_table(TableT* _orig);
|
||||
copy_table(TableT* _orig, uint64_t _tbl_instance);
|
||||
|
||||
template <typename TableT>
|
||||
void
|
||||
|
||||
@@ -181,10 +181,10 @@ AsyncSignalHandler(hsa_signal_value_t /*signal_v*/, void* data)
|
||||
|
||||
if(_corr_id)
|
||||
{
|
||||
LOG_IF(FATAL, _corr_id->ref_count.load() == 0)
|
||||
LOG_IF(FATAL, _corr_id->get_ref_count() == 0)
|
||||
<< "reference counter for correlation id " << _corr_id->internal << " from thread "
|
||||
<< _corr_id->thread_idx << " has no reference count";
|
||||
_corr_id->ref_count.fetch_sub(1);
|
||||
_corr_id->sub_ref_count();
|
||||
}
|
||||
|
||||
queue_info_session.queue.async_complete();
|
||||
@@ -257,24 +257,17 @@ WriteInterceptor(const void* packets,
|
||||
auto* corr_id = context::get_latest_correlation_id();
|
||||
|
||||
// use thread-local value to reuse allocation
|
||||
static thread_local auto extern_corr_ids_tl =
|
||||
Queue::queue_info_session_t::external_corr_id_map_t{};
|
||||
auto extern_corr_ids = Queue::queue_info_session_t::external_corr_id_map_t{};
|
||||
|
||||
// increase the reference count to denote that this correlation id is being used in a kernel
|
||||
if(corr_id)
|
||||
{
|
||||
extern_corr_ids_tl.clear(); // clear it so that it only contains the current contexts
|
||||
extern_corr_ids_tl.reserve(ctxs.size()); // reserve for performance
|
||||
extern_corr_ids.clear(); // clear it so that it only contains the current contexts
|
||||
extern_corr_ids.reserve(ctxs.size()); // reserve for performance
|
||||
for(const auto* ctx : ctxs)
|
||||
extern_corr_ids_tl.emplace(ctx,
|
||||
ctx->correlation_tracer.external_correlator.get(thr_id));
|
||||
corr_id->ref_count.fetch_add(1);
|
||||
extern_corr_ids.emplace(ctx, ctx->correlation_tracer.external_correlator.get(thr_id));
|
||||
}
|
||||
|
||||
// move to local variable
|
||||
auto extern_corr_ids = std::move(extern_corr_ids_tl);
|
||||
|
||||
// hsa_ext_amd_aql_pm4_packet_t
|
||||
const auto* packets_arr = static_cast<const rocprofiler_packet*>(packets);
|
||||
auto transformed_packets = std::vector<rocprofiler_packet>{};
|
||||
|
||||
@@ -375,6 +368,8 @@ WriteInterceptor(const void* packets,
|
||||
LOG_IF(FATAL, packet_type != HSA_PACKET_TYPE_KERNEL_DISPATCH)
|
||||
<< "get_kernel_id below might need to be updated";
|
||||
|
||||
if(corr_id) corr_id->add_ref_count();
|
||||
|
||||
// Enqueue the signal into the handler. Will call completed_cb when
|
||||
// signal completes.
|
||||
queue.async_started();
|
||||
|
||||
@@ -191,7 +191,6 @@ roctx_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
|
||||
auto thr_id = common::get_tid();
|
||||
auto callback_contexts = std::vector<callback_context_data>{};
|
||||
auto buffered_contexts = std::vector<buffered_context_data>{};
|
||||
auto has_pc_sampling = false;
|
||||
|
||||
populate_contexts(info_type::callback_domain_idx,
|
||||
info_type::buffered_domain_idx,
|
||||
@@ -208,7 +207,7 @@ roctx_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto ref_count = (has_pc_sampling) ? 4 : 2;
|
||||
auto ref_count = 2;
|
||||
auto buffer_record = common::init_public_api_struct(buffer_marker_api_record_t{});
|
||||
auto tracer_data = callback_api_data_t{.size = sizeof(callback_api_data_t)};
|
||||
auto* corr_id = correlation_service::construct(ref_count);
|
||||
@@ -274,7 +273,7 @@ roctx_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
|
||||
}
|
||||
|
||||
// decrement the reference count before invoking
|
||||
corr_id->ref_count.fetch_sub(1);
|
||||
corr_id->sub_ref_count();
|
||||
|
||||
auto _ret = exec(info_type::get_table_func(), std::forward<Args>(args)...);
|
||||
|
||||
@@ -326,7 +325,7 @@ roctx_api_impl<TableIdx, OpIdx>::functor(Args&&... args)
|
||||
}
|
||||
|
||||
// decrement the reference count after usage in the callback/buffers
|
||||
corr_id->ref_count.fetch_sub(1);
|
||||
corr_id->sub_ref_count();
|
||||
|
||||
context::pop_latest_correlation_id(corr_id);
|
||||
|
||||
@@ -420,6 +419,7 @@ iterate_args(const uint32_t id,
|
||||
user_data);
|
||||
if(ret != 0) break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if constexpr(sizeof...(OpIdxTail) > 0)
|
||||
iterate_args<TableIdx>(id, data, func, user_data, std::index_sequence<OpIdxTail...>{});
|
||||
@@ -451,7 +451,7 @@ should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain,
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx>
|
||||
void
|
||||
copy_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
|
||||
copy_table(Tp* _orig, uint64_t _tbl_instance, std::integral_constant<size_t, OpIdx>)
|
||||
{
|
||||
using table_type = typename roctx_table_lookup<TableIdx>::type;
|
||||
|
||||
@@ -459,24 +459,34 @@ copy_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
|
||||
{
|
||||
auto _info = roctx_api_info<TableIdx, OpIdx>{};
|
||||
|
||||
LOG(INFO) << "copying table entry for " << _info.name;
|
||||
|
||||
// make sure we don't access a field that doesn't exist in input table
|
||||
if(_info.offset() >= _orig->size) return;
|
||||
|
||||
// 1. get the sub-table containing the function pointer in original table
|
||||
// 2. get reference to function pointer in sub-table in original table
|
||||
auto& _table = _info.get_table(_orig);
|
||||
auto& _func = _info.get_table_func(_table);
|
||||
auto& _orig_table = _info.get_table(_orig);
|
||||
auto& _orig_func = _info.get_table_func(_orig_table);
|
||||
// 3. get the sub-table containing the function pointer in saved table
|
||||
// 4. get reference to function pointer in sub-table in saved table
|
||||
// 5. save the original function in the saved table
|
||||
auto& _saved = _info.get_table(*get_table<TableIdx>());
|
||||
auto& _ofunc = _info.get_table_func(_saved);
|
||||
_ofunc = _func;
|
||||
}
|
||||
auto& _copy_table = _info.get_table(*get_table<TableIdx>());
|
||||
auto& _copy_func = _info.get_table_func(_copy_table);
|
||||
|
||||
(void) _orig;
|
||||
LOG_IF(FATAL, _copy_func && _tbl_instance == 0)
|
||||
<< _info.name << " has non-null function pointer " << _copy_func
|
||||
<< " despite this being the first instance of the library being copies";
|
||||
|
||||
if(!_copy_func)
|
||||
{
|
||||
LOG(INFO) << "copying table entry for " << _info.name;
|
||||
_copy_func = _orig_func;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG(INFO) << "skipping copying table entry for " << _info.name
|
||||
<< " from table instance " << _tbl_instance;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx>
|
||||
@@ -507,17 +517,15 @@ update_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
|
||||
auto& _func = _info.get_table_func(_table);
|
||||
_func = _info.get_functor(_func);
|
||||
}
|
||||
|
||||
(void) _orig;
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx, size_t... OpIdxTail>
|
||||
void
|
||||
copy_table(Tp* _orig, std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
copy_table(Tp* _orig, uint64_t _tbl_instance, std::index_sequence<OpIdx, OpIdxTail...>)
|
||||
{
|
||||
copy_table<TableIdx>(_orig, std::integral_constant<size_t, OpIdx>{});
|
||||
copy_table<TableIdx>(_orig, _tbl_instance, std::integral_constant<size_t, OpIdx>{});
|
||||
if constexpr(sizeof...(OpIdxTail) > 0)
|
||||
copy_table<TableIdx>(_orig, std::index_sequence<OpIdxTail...>{});
|
||||
copy_table<TableIdx>(_orig, _tbl_instance, std::index_sequence<OpIdxTail...>{});
|
||||
}
|
||||
|
||||
template <size_t TableIdx, typename Tp, size_t OpIdx, size_t... OpIdxTail>
|
||||
@@ -585,11 +593,12 @@ iterate_args(uint32_t id,
|
||||
|
||||
template <typename TableT>
|
||||
void
|
||||
copy_table(TableT* _orig)
|
||||
copy_table(TableT* _orig, uint64_t _tbl_instance)
|
||||
{
|
||||
constexpr auto TableIdx = roctx_table_id_lookup<TableT>::value;
|
||||
if(_orig)
|
||||
copy_table<TableIdx>(_orig, std::make_index_sequence<roctx_domain_info<TableIdx>::last>{});
|
||||
copy_table<TableIdx>(
|
||||
_orig, _tbl_instance, std::make_index_sequence<roctx_domain_info<TableIdx>::last>{});
|
||||
}
|
||||
|
||||
template <typename TableT>
|
||||
@@ -606,7 +615,7 @@ using iterate_args_data_t = rocprofiler_callback_tracing_marker_api_data_t;
|
||||
using iterate_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t;
|
||||
|
||||
#define INSTANTIATE_MARKER_TABLE_FUNC(TABLE_TYPE, TABLE_IDX) \
|
||||
template void copy_table<TABLE_TYPE>(TABLE_TYPE * _tbl); \
|
||||
template void copy_table<TABLE_TYPE>(TABLE_TYPE * _tbl, uint64_t _instv); \
|
||||
template void update_table<TABLE_TYPE>(TABLE_TYPE * _tbl); \
|
||||
template const char* name_by_id<TABLE_IDX>(uint32_t); \
|
||||
template uint32_t id_by_name<TABLE_IDX>(const char*); \
|
||||
|
||||
@@ -91,7 +91,7 @@ iterate_args(uint32_t id,
|
||||
|
||||
template <typename TableT>
|
||||
void
|
||||
copy_table(TableT* _orig);
|
||||
copy_table(TableT* _orig, uint64_t _tbl_instance);
|
||||
|
||||
template <typename TableT>
|
||||
void
|
||||
|
||||
@@ -652,7 +652,7 @@ rocprofiler_set_api_table(const char* name,
|
||||
// any internal modifications to the HipDispatchTable need to be done before we make the
|
||||
// copy or else those modifications will be lost when HIP API tracing is enabled
|
||||
// because the HIP API tracing invokes the function pointers from the copy below
|
||||
rocprofiler::hip::copy_table(hip_runtime_api_table);
|
||||
rocprofiler::hip::copy_table(hip_runtime_api_table, lib_instance);
|
||||
|
||||
// install rocprofiler API wrappers
|
||||
rocprofiler::hip::update_table(hip_runtime_api_table);
|
||||
@@ -675,7 +675,7 @@ rocprofiler_set_api_table(const char* name,
|
||||
// any internal modifications to the HipCompilerDispatchTable need to be done before we make
|
||||
// the copy or else those modifications will be lost when HIP API tracing is enabled because
|
||||
// the HIP API tracing invokes the function pointers from the copy below
|
||||
rocprofiler::hip::copy_table(hip_compiler_api_table);
|
||||
rocprofiler::hip::copy_table(hip_compiler_api_table, lib_instance);
|
||||
|
||||
// install rocprofiler API wrappers
|
||||
rocprofiler::hip::update_table(hip_compiler_api_table);
|
||||
@@ -703,15 +703,15 @@ rocprofiler_set_api_table(const char* name,
|
||||
rocprofiler::agent::construct_agent_cache(hsa_api_table);
|
||||
rocprofiler::hsa::queue_controller_init(hsa_api_table);
|
||||
rocprofiler::hsa::code_object_init(hsa_api_table);
|
||||
rocprofiler::hsa::async_copy_init(hsa_api_table);
|
||||
rocprofiler::hsa::async_copy_init(hsa_api_table, lib_instance);
|
||||
|
||||
// any internal modifications to the HsaApiTable need to be done before we make the
|
||||
// copy or else those modifications will be lost when HSA API tracing is enabled
|
||||
// because the HSA API tracing invokes the function pointers from the copy below
|
||||
rocprofiler::hsa::copy_table(hsa_api_table->core_);
|
||||
rocprofiler::hsa::copy_table(hsa_api_table->amd_ext_);
|
||||
rocprofiler::hsa::copy_table(hsa_api_table->image_ext_);
|
||||
rocprofiler::hsa::copy_table(hsa_api_table->finalizer_ext_);
|
||||
rocprofiler::hsa::copy_table(hsa_api_table->core_, lib_instance);
|
||||
rocprofiler::hsa::copy_table(hsa_api_table->amd_ext_, lib_instance);
|
||||
rocprofiler::hsa::copy_table(hsa_api_table->image_ext_, lib_instance);
|
||||
rocprofiler::hsa::copy_table(hsa_api_table->finalizer_ext_, lib_instance);
|
||||
|
||||
// install rocprofiler API wrappers
|
||||
rocprofiler::hsa::update_table(hsa_api_table->core_);
|
||||
@@ -736,9 +736,9 @@ rocprofiler_set_api_table(const char* name,
|
||||
// any internal modifications to the roctxApiTable_t need to be done before we make
|
||||
// the copy or else those modifications will be lost when ROCTx tracing is enabled because
|
||||
// the ROCTx tracing invokes the function pointers from the copy below
|
||||
rocprofiler::marker::copy_table(roctx_core);
|
||||
rocprofiler::marker::copy_table(roctx_ctrl);
|
||||
rocprofiler::marker::copy_table(roctx_name);
|
||||
rocprofiler::marker::copy_table(roctx_core, lib_instance);
|
||||
rocprofiler::marker::copy_table(roctx_ctrl, lib_instance);
|
||||
rocprofiler::marker::copy_table(roctx_name, lib_instance);
|
||||
|
||||
// install rocprofiler API wrappers
|
||||
rocprofiler::marker::update_table(roctx_core);
|
||||
|
||||
@@ -44,6 +44,7 @@ def test_data_structure(input_data):
|
||||
node_exists("hsa_api_traces", sdk_data["buffer_records"])
|
||||
node_exists("hip_api_traces", sdk_data["buffer_records"], 0)
|
||||
node_exists("marker_api_traces", sdk_data["buffer_records"])
|
||||
node_exists("retired_correlation_ids", sdk_data["buffer_records"])
|
||||
|
||||
|
||||
def test_timestamps(input_data):
|
||||
@@ -189,6 +190,50 @@ def test_async_copy_direction(input_data):
|
||||
assert async_dir_cnt[4] == 0
|
||||
|
||||
|
||||
def test_retired_correlation_ids(input_data):
|
||||
data = input_data
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
|
||||
def _sort_dict(inp):
|
||||
return dict(sorted(inp.items()))
|
||||
|
||||
api_corr_ids = {}
|
||||
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
corr_id = itr["correlation_id"]["internal"]
|
||||
assert corr_id not in api_corr_ids.keys()
|
||||
api_corr_ids[corr_id] = itr
|
||||
|
||||
async_corr_ids = {}
|
||||
for titr in ["kernel_dispatches", "memory_copies"]:
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
corr_id = itr["correlation_id"]["internal"]
|
||||
assert corr_id not in async_corr_ids.keys()
|
||||
async_corr_ids[corr_id] = itr
|
||||
|
||||
retired_corr_ids = {}
|
||||
for itr in sdk_data["buffer_records"]["retired_correlation_ids"]:
|
||||
corr_id = itr["internal_correlation_id"]
|
||||
assert corr_id not in retired_corr_ids.keys()
|
||||
retired_corr_ids[corr_id] = itr
|
||||
|
||||
api_corr_ids = _sort_dict(api_corr_ids)
|
||||
async_corr_ids = _sort_dict(async_corr_ids)
|
||||
retired_corr_ids = _sort_dict(retired_corr_ids)
|
||||
|
||||
for cid, itr in async_corr_ids.items():
|
||||
assert cid in retired_corr_ids.keys()
|
||||
ts = retired_corr_ids[cid]["timestamp"]
|
||||
assert ts > itr["end_timestamp"]
|
||||
|
||||
for cid, itr in api_corr_ids.items():
|
||||
assert cid in retired_corr_ids.keys()
|
||||
ts = retired_corr_ids[cid]["timestamp"]
|
||||
assert ts > itr["end_timestamp"]
|
||||
|
||||
assert len(api_corr_ids.keys()) == (len(retired_corr_ids.keys()))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
|
||||
sys.exit(exit_code)
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
# common utilities for tests
|
||||
#
|
||||
|
||||
include(FetchContent)
|
||||
|
||||
set(FETCHCONTENT_BASE_DIR ${PROJECT_BINARY_DIR}/external)
|
||||
|
||||
# build flags
|
||||
add_library(rocprofiler-tests-build-flags INTERFACE)
|
||||
add_library(rocprofiler::tests-build-flags ALIAS rocprofiler-tests-build-flags)
|
||||
@@ -35,10 +39,6 @@ if(NOT TARGET rocprofiler::cereal)
|
||||
INTERFACE $<BUILD_INTERFACE:${ROCPROFILER_SOURCE_DIR}/external/cereal/include>
|
||||
)
|
||||
else()
|
||||
include(FetchContent)
|
||||
|
||||
set(FETCHCONTENT_BASE_DIR ${PROJECT_BINARY_DIR}/external)
|
||||
|
||||
fetchcontent_declare(
|
||||
cereal
|
||||
GIT_REPOSITORY https://github.com/jrmadsen/cereal.git
|
||||
@@ -60,6 +60,32 @@ if(NOT TARGET rocprofiler::cereal)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# perfetto
|
||||
fetchcontent_declare(
|
||||
perfetto
|
||||
GIT_REPOSITORY https://android.googlesource.com/platform/external/perfetto
|
||||
GIT_TAG v42.0
|
||||
SOURCE_DIR ${PROJECT_BINARY_DIR}/external/perfetto BINARY_DIR
|
||||
${PROJECT_BINARY_DIR}/external/build/perfetto-build SUBBUILD_DIR
|
||||
${PROJECT_BINARY_DIR}/external/build/perfetto-subdir)
|
||||
|
||||
fetchcontent_getproperties(perfetto)
|
||||
|
||||
if(NOT perfetto_POPULATED)
|
||||
fetchcontent_populate(perfetto)
|
||||
endif()
|
||||
|
||||
add_library(rocprofiler-tests-perfetto STATIC)
|
||||
add_library(rocprofiler::tests-perfetto ALIAS rocprofiler-tests-perfetto)
|
||||
target_sources(
|
||||
rocprofiler-tests-perfetto
|
||||
PRIVATE ${PROJECT_BINARY_DIR}/external/perfetto/sdk/perfetto.h
|
||||
${PROJECT_BINARY_DIR}/external/perfetto/sdk/perfetto.cc)
|
||||
target_include_directories(
|
||||
rocprofiler-tests-perfetto SYSTEM
|
||||
INTERFACE $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/external/perfetto/sdk>)
|
||||
set_target_properties(rocprofiler-tests-perfetto PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
# common utilities
|
||||
cmake_path(GET CMAKE_CURRENT_SOURCE_DIR PARENT_PATH COMMON_LIBRARY_INCLUDE_DIR)
|
||||
|
||||
|
||||
@@ -0,0 +1,225 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
#define ROCPROFILER_DEFINE_NAME_TRAIT(NAME, DESC, ...) \
|
||||
namespace rocprofiler \
|
||||
{ \
|
||||
template <> \
|
||||
struct perfetto_category<__VA_ARGS__> \
|
||||
{ \
|
||||
static constexpr auto value = NAME; \
|
||||
static constexpr auto description = DESC; \
|
||||
}; \
|
||||
}
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
template <typename Tp>
|
||||
struct perfetto_category;
|
||||
|
||||
namespace trait
|
||||
{
|
||||
template <typename... Tp>
|
||||
using name = perfetto_category<Tp...>;
|
||||
}
|
||||
} // namespace rocprofiler
|
||||
|
||||
#define ROCPROFILER_DEFINE_NS_API(NS, NAME) \
|
||||
namespace rocprofiler \
|
||||
{ \
|
||||
namespace NS \
|
||||
{ \
|
||||
struct NAME; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define ROCPROFILER_DEFINE_CATEGORY(NS, VALUE, DESC) \
|
||||
ROCPROFILER_DEFINE_NS_API(NS, VALUE) \
|
||||
ROCPROFILER_DEFINE_NAME_TRAIT(#VALUE, DESC, NS::VALUE)
|
||||
|
||||
ROCPROFILER_DEFINE_CATEGORY(category, hsa_api, "HSA API function")
|
||||
ROCPROFILER_DEFINE_CATEGORY(category, hip_api, "HIP API function")
|
||||
ROCPROFILER_DEFINE_CATEGORY(category, marker_api, "Marker API region")
|
||||
ROCPROFILER_DEFINE_CATEGORY(category, kernel_dispatch, "GPU kernel dispatch")
|
||||
ROCPROFILER_DEFINE_CATEGORY(category, memory_copy, "Async memory copy")
|
||||
|
||||
#define ROCPROFILER_PERFETTO_CATEGORY(TYPE) \
|
||||
::perfetto::Category(::rocprofiler::perfetto_category<::rocprofiler::TYPE>::value) \
|
||||
.SetDescription(::rocprofiler::perfetto_category<::rocprofiler::TYPE>::description)
|
||||
|
||||
#define ROCPROFILER_PERFETTO_CATEGORIES \
|
||||
ROCPROFILER_PERFETTO_CATEGORY(category::hsa_api), \
|
||||
ROCPROFILER_PERFETTO_CATEGORY(category::hip_api), \
|
||||
ROCPROFILER_PERFETTO_CATEGORY(category::marker_api), \
|
||||
ROCPROFILER_PERFETTO_CATEGORY(category::kernel_dispatch), \
|
||||
ROCPROFILER_PERFETTO_CATEGORY(category::memory_copy)
|
||||
|
||||
#include <perfetto.h>
|
||||
|
||||
PERFETTO_DEFINE_CATEGORIES(ROCPROFILER_PERFETTO_CATEGORIES);
|
||||
|
||||
namespace concepts
|
||||
{
|
||||
template <typename Tp>
|
||||
struct is_string_type : std::false_type
|
||||
{};
|
||||
|
||||
template <>
|
||||
struct is_string_type<std::string> : std::true_type
|
||||
{};
|
||||
|
||||
template <>
|
||||
struct is_string_type<char*> : std::true_type
|
||||
{};
|
||||
|
||||
template <>
|
||||
struct is_string_type<const char*> : std::true_type
|
||||
{};
|
||||
|
||||
template <>
|
||||
struct is_string_type<std::string_view> : std::true_type
|
||||
{};
|
||||
|
||||
template <typename Tp>
|
||||
struct is_string_type<const Tp> : is_string_type<std::decay_t<Tp>>
|
||||
{};
|
||||
|
||||
template <typename Tp>
|
||||
struct is_string_type<Tp&> : is_string_type<std::decay_t<Tp>>
|
||||
{};
|
||||
|
||||
template <typename Tp>
|
||||
struct is_string_type<volatile Tp> : is_string_type<std::decay_t<Tp>>
|
||||
{};
|
||||
|
||||
template <typename Tp, size_t N>
|
||||
struct is_string_type<Tp[N]> : is_string_type<std::decay_t<Tp[N]>>
|
||||
{};
|
||||
|
||||
template <typename Tp, size_t N>
|
||||
struct is_string_type<const Tp[N]> : is_string_type<std::decay_t<Tp[N]>>
|
||||
{};
|
||||
|
||||
template <typename Tp, size_t N>
|
||||
struct is_string_type<volatile Tp[N]> : is_string_type<std::decay_t<Tp[N]>>
|
||||
{};
|
||||
|
||||
template <typename Tp>
|
||||
struct unqualified_type
|
||||
{
|
||||
using type = std::remove_reference_t<std::remove_cv_t<std::decay_t<Tp>>>;
|
||||
};
|
||||
|
||||
template <typename Tp>
|
||||
using unqualified_type_t = typename unqualified_type<Tp>::type;
|
||||
|
||||
template <typename Tp>
|
||||
struct can_stringify
|
||||
{
|
||||
private:
|
||||
static constexpr auto sfinae(int)
|
||||
-> decltype(std::declval<std::ostream&>() << std::declval<Tp>(), bool())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static constexpr auto sfinae(long) { return false; }
|
||||
|
||||
public:
|
||||
static constexpr bool value = sfinae(0);
|
||||
constexpr auto operator()() const { return sfinae(0); }
|
||||
};
|
||||
} // namespace concepts
|
||||
|
||||
using perfetto_event_context_t = ::perfetto::EventContext;
|
||||
|
||||
template <typename Np, typename Tp>
|
||||
auto
|
||||
add_perfetto_annotation(perfetto_event_context_t& ctx, Np&& _name, Tp&& _val)
|
||||
{
|
||||
using named_type = concepts::unqualified_type_t<Np>;
|
||||
using value_type = concepts::unqualified_type_t<Tp>;
|
||||
|
||||
static_assert(concepts::is_string_type<named_type>::value, "Error! name is not a string type");
|
||||
|
||||
auto _get_dbg = [&]() {
|
||||
auto* _dbg = ctx.event()->add_debug_annotations();
|
||||
_dbg->set_name(std::string_view{std::forward<Np>(_name)}.data());
|
||||
return _dbg;
|
||||
};
|
||||
|
||||
if constexpr(std::is_same<value_type, std::string_view>::value)
|
||||
{
|
||||
_get_dbg()->set_string_value(_val.data());
|
||||
}
|
||||
else if constexpr(concepts::is_string_type<value_type>::value)
|
||||
{
|
||||
_get_dbg()->set_string_value(std::forward<Tp>(_val));
|
||||
}
|
||||
else if constexpr(std::is_same<value_type, bool>::value)
|
||||
{
|
||||
_get_dbg()->set_bool_value(_val);
|
||||
}
|
||||
else if constexpr(std::is_enum<value_type>::value)
|
||||
{
|
||||
_get_dbg()->set_int_value(static_cast<int64_t>(_val));
|
||||
}
|
||||
else if constexpr(std::is_floating_point<value_type>::value)
|
||||
{
|
||||
_get_dbg()->set_double_value(static_cast<double>(_val));
|
||||
}
|
||||
else if constexpr(std::is_integral<value_type>::value)
|
||||
{
|
||||
if constexpr(std::is_unsigned<value_type>::value)
|
||||
{
|
||||
_get_dbg()->set_uint_value(_val);
|
||||
}
|
||||
else
|
||||
{
|
||||
_get_dbg()->set_int_value(_val);
|
||||
}
|
||||
}
|
||||
else if constexpr(std::is_pointer<value_type>::value)
|
||||
{
|
||||
_get_dbg()->set_pointer_value(reinterpret_cast<uint64_t>(_val));
|
||||
}
|
||||
else if constexpr(concepts::can_stringify<value_type>::value)
|
||||
{
|
||||
auto _ss = std::stringstream{};
|
||||
_ss << std::forward<Tp>(_val);
|
||||
_get_dbg()->set_string_value(_ss.str());
|
||||
}
|
||||
else
|
||||
{
|
||||
static_assert(std::is_empty<value_type>::value, "Error! unsupported data type");
|
||||
}
|
||||
}
|
||||
@@ -30,7 +30,9 @@
|
||||
#include <rocprofiler-sdk/internal_threading.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include <cereal/archives/binary.hpp>
|
||||
#include <cereal/archives/json.hpp>
|
||||
#include <cereal/archives/portable_binary.hpp>
|
||||
#include <cereal/cereal.hpp>
|
||||
#include <cereal/types/array.hpp>
|
||||
#include <cereal/types/atomic.hpp>
|
||||
@@ -286,6 +288,16 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_memory_copy_record_t data)
|
||||
SAVE_DATA_FIELD(src_agent_id);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_buffer_tracing_correlation_id_retirement_record_t data)
|
||||
{
|
||||
SAVE_DATA_FIELD(size);
|
||||
SAVE_DATA_FIELD(kind);
|
||||
SAVE_DATA_FIELD(timestamp);
|
||||
SAVE_DATA_FIELD(internal_correlation_id);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, HsaCacheType data)
|
||||
|
||||
@@ -44,6 +44,7 @@ def test_data_structure(input_data):
|
||||
node_exists("hsa_api_traces", sdk_data["buffer_records"])
|
||||
node_exists("hip_api_traces", sdk_data["buffer_records"], 0)
|
||||
node_exists("marker_api_traces", sdk_data["buffer_records"])
|
||||
node_exists("retired_correlation_ids", sdk_data["buffer_records"])
|
||||
|
||||
|
||||
def test_timestamps(input_data):
|
||||
@@ -123,6 +124,50 @@ def test_internal_correlation_ids(input_data):
|
||||
assert max(api_corr_ids_sorted) == len_corr_id_unq
|
||||
|
||||
|
||||
def test_retired_correlation_ids(input_data):
|
||||
data = input_data
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
|
||||
def _sort_dict(inp):
|
||||
return dict(sorted(inp.items()))
|
||||
|
||||
api_corr_ids = {}
|
||||
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
corr_id = itr["correlation_id"]["internal"]
|
||||
assert corr_id not in api_corr_ids.keys()
|
||||
api_corr_ids[corr_id] = itr
|
||||
|
||||
async_corr_ids = {}
|
||||
for titr in ["kernel_dispatches", "memory_copies"]:
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
corr_id = itr["correlation_id"]["internal"]
|
||||
assert corr_id not in async_corr_ids.keys()
|
||||
async_corr_ids[corr_id] = itr
|
||||
|
||||
retired_corr_ids = {}
|
||||
for itr in sdk_data["buffer_records"]["retired_correlation_ids"]:
|
||||
corr_id = itr["internal_correlation_id"]
|
||||
assert corr_id not in retired_corr_ids.keys()
|
||||
retired_corr_ids[corr_id] = itr
|
||||
|
||||
api_corr_ids = _sort_dict(api_corr_ids)
|
||||
async_corr_ids = _sort_dict(async_corr_ids)
|
||||
retired_corr_ids = _sort_dict(retired_corr_ids)
|
||||
|
||||
for cid, itr in async_corr_ids.items():
|
||||
assert cid in retired_corr_ids.keys()
|
||||
ts = retired_corr_ids[cid]["timestamp"]
|
||||
assert ts > itr["end_timestamp"]
|
||||
|
||||
for cid, itr in api_corr_ids.items():
|
||||
assert cid in retired_corr_ids.keys()
|
||||
ts = retired_corr_ids[cid]["timestamp"]
|
||||
assert ts > itr["end_timestamp"]
|
||||
|
||||
assert len(api_corr_ids.keys()) == (len(retired_corr_ids.keys()))
|
||||
|
||||
|
||||
def test_external_correlation_ids(input_data):
|
||||
data = input_data
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
|
||||
@@ -23,7 +23,8 @@ def test_hsa_api_trace(hsa_input_data):
|
||||
correlation_ids = sorted(list(set(correlation_ids)))
|
||||
|
||||
hsa_api_calls_offset = 2 # roctxRangePush is first
|
||||
num_marker_api_calls = 6 # six marker API calls
|
||||
num_marker_api_calls = 6 # seven marker API calls, only six entries in
|
||||
# marker csv data because roctxRangePush + roctxRangePop is one entry
|
||||
|
||||
# all correlation ids are unique
|
||||
assert len(correlation_ids) == len(hsa_input_data)
|
||||
|
||||
@@ -15,7 +15,7 @@ target_sources(rocprofiler-sdk-json-tool PRIVATE json-tool.cpp)
|
||||
target_link_libraries(
|
||||
rocprofiler-sdk-json-tool
|
||||
PRIVATE rocprofiler::rocprofiler rocprofiler::cereal rocprofiler::tests-build-flags
|
||||
rocprofiler::tests-common-library)
|
||||
rocprofiler::tests-common-library rocprofiler::tests-perfetto)
|
||||
set_target_properties(
|
||||
rocprofiler-sdk-json-tool
|
||||
PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib/rocprofiler-sdk"
|
||||
|
||||
@@ -33,9 +33,11 @@
|
||||
|
||||
#include "common/defines.hpp"
|
||||
#include "common/filesystem.hpp"
|
||||
#include "common/perfetto.hpp"
|
||||
#include "common/serialization.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/buffer.h>
|
||||
#include <rocprofiler-sdk/buffer_tracing.h>
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
#include <rocprofiler-sdk/external_correlation.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
@@ -44,6 +46,7 @@
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
@@ -61,6 +64,7 @@
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <thread>
|
||||
#include <type_traits>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
@@ -68,6 +72,101 @@ namespace client
|
||||
{
|
||||
namespace
|
||||
{
|
||||
template <typename Tp>
|
||||
size_t
|
||||
get_hash_id(Tp&& _val)
|
||||
{
|
||||
if constexpr(!std::is_pointer<Tp>::value)
|
||||
return std::hash<Tp>{}(std::forward<Tp>(_val));
|
||||
else if constexpr(std::is_same<Tp, const char*>::value)
|
||||
return get_hash_id(std::string_view{_val});
|
||||
else
|
||||
return get_hash_id(*_val);
|
||||
}
|
||||
|
||||
std::string
|
||||
demangle(std::string_view _mangled_name, int& _status)
|
||||
{
|
||||
constexpr size_t buffer_len = 4096;
|
||||
// return the mangled since there is no buffer
|
||||
if(_mangled_name.empty())
|
||||
{
|
||||
_status = -2;
|
||||
return std::string{};
|
||||
}
|
||||
|
||||
auto _demangled_name = std::string{_mangled_name};
|
||||
|
||||
// PARAMETERS to __cxa_demangle
|
||||
// mangled_name:
|
||||
// A NULL-terminated character string containing the name to be demangled.
|
||||
// buffer:
|
||||
// A region of memory, allocated with malloc, of *length bytes, into which the
|
||||
// demangled name is stored. If output_buffer is not long enough, it is expanded
|
||||
// using realloc. output_buffer may instead be NULL; in that case, the demangled
|
||||
// name is placed in a region of memory allocated with malloc.
|
||||
// _buflen:
|
||||
// If length is non-NULL, the length of the buffer containing the demangled name
|
||||
// is placed in *length.
|
||||
// status:
|
||||
// *status is set to one of the following values
|
||||
size_t _demang_len = 0;
|
||||
char* _demang = abi::__cxa_demangle(_demangled_name.c_str(), nullptr, &_demang_len, &_status);
|
||||
switch(_status)
|
||||
{
|
||||
// 0 : The demangling operation succeeded.
|
||||
// -1 : A memory allocation failure occurred.
|
||||
// -2 : mangled_name is not a valid name under the C++ ABI mangling rules.
|
||||
// -3 : One of the arguments is invalid.
|
||||
case 0:
|
||||
{
|
||||
if(_demang) _demangled_name = std::string{_demang};
|
||||
break;
|
||||
}
|
||||
case -1:
|
||||
{
|
||||
char _msg[buffer_len];
|
||||
::memset(_msg, '\0', buffer_len * sizeof(char));
|
||||
::snprintf(_msg,
|
||||
buffer_len,
|
||||
"memory allocation failure occurred demangling %s",
|
||||
_demangled_name.c_str());
|
||||
::perror(_msg);
|
||||
break;
|
||||
}
|
||||
case -2: break;
|
||||
case -3:
|
||||
{
|
||||
char _msg[buffer_len];
|
||||
::memset(_msg, '\0', buffer_len * sizeof(char));
|
||||
::snprintf(_msg,
|
||||
buffer_len,
|
||||
"Invalid argument in: (\"%s\", nullptr, nullptr, %p)",
|
||||
_demangled_name.c_str(),
|
||||
(void*) &_status);
|
||||
::perror(_msg);
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
};
|
||||
|
||||
// if it "demangled" but the length is zero, set the status to -2
|
||||
if(_demang_len == 0 && _status == 0) _status = -2;
|
||||
|
||||
// free allocated buffer
|
||||
::free(_demang);
|
||||
return _demangled_name;
|
||||
}
|
||||
|
||||
std::string
|
||||
demangle(std::string_view symbol)
|
||||
{
|
||||
int _status = 0;
|
||||
auto demangled_str = demangle(symbol, _status);
|
||||
if(_status == 0) return demangled_str;
|
||||
return std::string{symbol};
|
||||
}
|
||||
|
||||
struct source_location
|
||||
{
|
||||
std::string function = {};
|
||||
@@ -262,11 +361,19 @@ template <typename ArchiveT>
|
||||
void
|
||||
serialize_args(ArchiveT& ar, const callback_arg_array_t& data)
|
||||
{
|
||||
ar.setNextName("args");
|
||||
ar.startNode();
|
||||
for(const auto& itr : data)
|
||||
ar(cereal::make_nvp(itr.first, itr.second));
|
||||
ar.finishNode();
|
||||
if constexpr(std::is_same<ArchiveT, cereal::BinaryOutputArchive>::value ||
|
||||
std::is_same<ArchiveT, cereal::PortableBinaryOutputArchive>::value)
|
||||
{
|
||||
ar(cereal::make_nvp("args", data));
|
||||
}
|
||||
else
|
||||
{
|
||||
ar.setNextName("args");
|
||||
ar.startNode();
|
||||
for(const auto& itr : data)
|
||||
ar(cereal::make_nvp(itr.first, itr.second));
|
||||
ar.finishNode();
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
@@ -352,6 +459,7 @@ struct marker_api_callback_record_t
|
||||
uint64_t timestamp = 0;
|
||||
rocprofiler_callback_tracing_record_t record = {};
|
||||
rocprofiler_callback_tracing_marker_api_data_t payload = {};
|
||||
callback_arg_array_t args = {};
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT& ar) const
|
||||
@@ -359,6 +467,7 @@ struct marker_api_callback_record_t
|
||||
ar(cereal::make_nvp("timestamp", timestamp));
|
||||
ar(cereal::make_nvp("record", record));
|
||||
ar(cereal::make_nvp("payload", payload));
|
||||
serialize_args(ar, args);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -387,6 +496,7 @@ counter_collection_buffered(rocprofiler_context_id_t, /*context*/
|
||||
else if(headers == nullptr)
|
||||
throw std::runtime_error{"rocprofiler invoked a buffer callback with a null pointer to the "
|
||||
"array of headers. this should never happen"};
|
||||
|
||||
for(size_t i = 0; i < num_headers; ++i)
|
||||
{
|
||||
auto* header = headers[i];
|
||||
@@ -487,9 +597,7 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t* /*user_data*/,
|
||||
void* /*callback_data*/)
|
||||
{
|
||||
static auto _mutex = std::mutex{};
|
||||
auto _lk = std::unique_lock<std::mutex>{_mutex};
|
||||
auto ts = rocprofiler_timestamp_t{};
|
||||
auto ts = rocprofiler_timestamp_t{};
|
||||
ROCPROFILER_CALL(rocprofiler_get_timestamp(&ts), "get timestamp");
|
||||
|
||||
static thread_local auto _once = std::once_flag{};
|
||||
@@ -505,12 +613,18 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
{
|
||||
auto data_v =
|
||||
*static_cast<rocprofiler_callback_tracing_code_object_load_data_t*>(record.payload);
|
||||
|
||||
static auto _mutex = std::mutex{};
|
||||
auto _lk = std::unique_lock<std::mutex>{_mutex};
|
||||
code_object_records.emplace_back(code_object_callback_record_t{ts, record, data_v});
|
||||
}
|
||||
else if(record.operation ==
|
||||
ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT_DEVICE_KERNEL_SYMBOL_REGISTER)
|
||||
{
|
||||
auto data_v = *static_cast<kernel_symbol_data_t*>(record.payload);
|
||||
|
||||
static auto _mutex = std::mutex{};
|
||||
auto _lk = std::unique_lock<std::mutex>{_mutex};
|
||||
kernel_symbol_records.emplace_back(kernel_symbol_callback_record_t{ts, record, data_v});
|
||||
}
|
||||
}
|
||||
@@ -521,7 +635,11 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
{
|
||||
auto* data = static_cast<rocprofiler_callback_tracing_hsa_api_data_t*>(record.payload);
|
||||
auto args = callback_arg_array_t{};
|
||||
rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, &args);
|
||||
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
|
||||
rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, &args);
|
||||
|
||||
static auto _mutex = std::mutex{};
|
||||
auto _lk = std::unique_lock<std::mutex>{_mutex};
|
||||
hsa_api_cb_records.emplace_back(
|
||||
hsa_api_callback_record_t{ts, record, *data, std::move(args)});
|
||||
}
|
||||
@@ -530,7 +648,11 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
{
|
||||
auto* data = static_cast<rocprofiler_callback_tracing_hip_api_data_t*>(record.payload);
|
||||
auto args = callback_arg_array_t{};
|
||||
rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, &args);
|
||||
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
|
||||
rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, &args);
|
||||
|
||||
static auto _mutex = std::mutex{};
|
||||
auto _lk = std::unique_lock<std::mutex>{_mutex};
|
||||
hip_api_cb_records.emplace_back(
|
||||
hip_api_callback_record_t{ts, record, *data, std::move(args)});
|
||||
}
|
||||
@@ -539,7 +661,14 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API)
|
||||
{
|
||||
auto* data = static_cast<rocprofiler_callback_tracing_marker_api_data_t*>(record.payload);
|
||||
marker_api_cb_records.emplace_back(marker_api_callback_record_t{ts, record, *data});
|
||||
auto args = callback_arg_array_t{};
|
||||
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
|
||||
rocprofiler_iterate_callback_tracing_kind_operation_args(record, save_args, &args);
|
||||
|
||||
static auto _mutex = std::mutex{};
|
||||
auto _lk = std::unique_lock<std::mutex>{_mutex};
|
||||
marker_api_cb_records.emplace_back(
|
||||
marker_api_callback_record_t{ts, record, *data, std::move(args)});
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -552,6 +681,8 @@ auto marker_api_bf_records = std::deque<rocprofiler_buffer_tracing_marker_api_
|
||||
auto hip_api_bf_records = std::deque<rocprofiler_buffer_tracing_hip_api_record_t>{};
|
||||
auto kernel_dispatch_records = std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>{};
|
||||
auto memory_copy_records = std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>{};
|
||||
auto corr_id_retire_records =
|
||||
std::deque<rocprofiler_buffer_tracing_correlation_id_retirement_record_t>{};
|
||||
|
||||
void
|
||||
tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
|
||||
@@ -634,6 +765,14 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
|
||||
|
||||
memory_copy_records.emplace_back(*record);
|
||||
}
|
||||
else if(header->kind == ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT)
|
||||
{
|
||||
auto* record =
|
||||
static_cast<rocprofiler_buffer_tracing_correlation_id_retirement_record_t*>(
|
||||
header->payload);
|
||||
|
||||
corr_id_retire_records.emplace_back(*record);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error{
|
||||
@@ -697,6 +836,7 @@ rocprofiler_context_id_t marker_api_buffered_ctx = {};
|
||||
rocprofiler_context_id_t kernel_dispatch_ctx = {};
|
||||
rocprofiler_context_id_t memory_copy_ctx = {};
|
||||
rocprofiler_context_id_t counter_collection_ctx = {};
|
||||
rocprofiler_context_id_t corr_id_retire_ctx = {};
|
||||
// buffers
|
||||
rocprofiler_buffer_id_t hsa_api_buffered_buffer = {};
|
||||
rocprofiler_buffer_id_t hip_api_buffered_buffer = {};
|
||||
@@ -704,6 +844,7 @@ rocprofiler_buffer_id_t marker_api_buffered_buffer = {};
|
||||
rocprofiler_buffer_id_t kernel_dispatch_buffer = {};
|
||||
rocprofiler_buffer_id_t memory_copy_buffer = {};
|
||||
rocprofiler_buffer_id_t counter_collection_buffer = {};
|
||||
rocprofiler_buffer_id_t corr_id_retire_buffer = {};
|
||||
|
||||
auto contexts = std::unordered_map<std::string_view, rocprofiler_context_id_t*>{
|
||||
{"HSA_API_CALLBACK", &hsa_api_callback_ctx},
|
||||
@@ -715,14 +856,19 @@ auto contexts = std::unordered_map<std::string_view, rocprofiler_context_id_t*>{
|
||||
{"MARKER_API_BUFFERED", &marker_api_buffered_ctx},
|
||||
{"KERNEL_DISPATCH", &kernel_dispatch_ctx},
|
||||
{"MEMORY_COPY", &memory_copy_ctx},
|
||||
{"COUNTER_COLLECTION", &counter_collection_ctx}};
|
||||
{"COUNTER_COLLECTION", &counter_collection_ctx},
|
||||
{"CORRELATION_ID_RETIREMENT", &corr_id_retire_ctx},
|
||||
};
|
||||
|
||||
auto buffers = std::array<rocprofiler_buffer_id_t*, 6>{&hsa_api_buffered_buffer,
|
||||
&hip_api_buffered_buffer,
|
||||
&marker_api_buffered_buffer,
|
||||
&kernel_dispatch_buffer,
|
||||
&memory_copy_buffer,
|
||||
&counter_collection_buffer};
|
||||
auto buffers = std::array<rocprofiler_buffer_id_t*, 7>{
|
||||
&hsa_api_buffered_buffer,
|
||||
&hip_api_buffered_buffer,
|
||||
&marker_api_buffered_buffer,
|
||||
&kernel_dispatch_buffer,
|
||||
&memory_copy_buffer,
|
||||
&counter_collection_buffer,
|
||||
&corr_id_retire_buffer,
|
||||
};
|
||||
|
||||
auto agents = std::vector<rocprofiler_agent_t>{};
|
||||
|
||||
@@ -866,6 +1012,24 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
&memory_copy_buffer),
|
||||
"buffer creation");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(corr_id_retire_ctx,
|
||||
buffer_size,
|
||||
watermark,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
tool_tracing_buffered,
|
||||
tool_data,
|
||||
&corr_id_retire_buffer),
|
||||
"buffer creation");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(counter_collection_ctx,
|
||||
buffer_size,
|
||||
watermark,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
counter_collection_buffered,
|
||||
nullptr,
|
||||
&counter_collection_buffer),
|
||||
"buffer creation");
|
||||
|
||||
for(auto itr : {ROCPROFILER_BUFFER_TRACING_HSA_CORE_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API,
|
||||
@@ -908,20 +1072,6 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
marker_api_buffered_buffer),
|
||||
"buffer tracing service configure");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(counter_collection_ctx,
|
||||
4096,
|
||||
2048,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
counter_collection_buffered,
|
||||
nullptr,
|
||||
&counter_collection_buffer),
|
||||
"buffer creation");
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_buffered_dispatch_profile_counting_service(
|
||||
counter_collection_ctx, counter_collection_buffer, dispatch_callback, nullptr),
|
||||
"setup buffered service");
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_buffer_tracing_service(kernel_dispatch_ctx,
|
||||
ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH,
|
||||
@@ -938,12 +1088,27 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
memory_copy_buffer),
|
||||
"buffer tracing service for memory copy configure");
|
||||
|
||||
auto client_thread = rocprofiler_callback_thread_t{};
|
||||
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_thread),
|
||||
"creating callback thread");
|
||||
ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service(
|
||||
corr_id_retire_ctx,
|
||||
ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT,
|
||||
nullptr,
|
||||
0,
|
||||
corr_id_retire_buffer),
|
||||
"buffer tracing service for memory copy configure");
|
||||
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_buffered_dispatch_profile_counting_service(
|
||||
counter_collection_ctx, counter_collection_buffer, dispatch_callback, nullptr),
|
||||
"setup buffered service");
|
||||
|
||||
for(auto* itr : buffers)
|
||||
{
|
||||
if(itr->handle == 0) continue;
|
||||
|
||||
auto client_thread = rocprofiler_callback_thread_t{};
|
||||
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_thread),
|
||||
"creating callback thread");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_assign_callback_thread(*itr, client_thread),
|
||||
"assignment of thread for buffer");
|
||||
}
|
||||
@@ -996,6 +1161,37 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
}
|
||||
}
|
||||
|
||||
auto* context_settings_excl_env = getenv("ROCPROFILER_TOOL_CONTEXTS_EXCLUDE");
|
||||
if(context_settings_excl_env != nullptr && !std::string_view{context_settings_excl_env}.empty())
|
||||
{
|
||||
auto context_settings = std::string{context_settings_excl_env};
|
||||
|
||||
// ignore case
|
||||
for(auto& itr : context_settings)
|
||||
itr = toupper(itr);
|
||||
|
||||
// if context is not in string, set the pointer to null in the contexts array
|
||||
auto options = std::stringstream{};
|
||||
for(auto& itr : contexts)
|
||||
{
|
||||
options << "\n\t- " << itr.first;
|
||||
auto pos = context_settings.find(itr.first);
|
||||
if(pos != std::string::npos) itr.second = nullptr;
|
||||
}
|
||||
|
||||
// detect if there are any invalid entries
|
||||
if(context_settings.find_first_not_of(" ,;:\t\n\r") != std::string::npos)
|
||||
{
|
||||
auto filename = std::string_view{__FILE__};
|
||||
auto msg = std::stringstream{};
|
||||
msg << "[rocprofiler-sdk-json-tool][" << filename.substr(filename.find_last_of('/') + 1)
|
||||
<< ":" << __LINE__
|
||||
<< "] invalid specification of ROCPROFILER_TOOL_CONTEXTS_EXCLUDE ('"
|
||||
<< context_settings_excl_env << "'). Valid choices are: " << options.str();
|
||||
throw std::runtime_error{msg.str()};
|
||||
}
|
||||
}
|
||||
|
||||
start();
|
||||
|
||||
// no errors
|
||||
@@ -1005,6 +1201,9 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
void
|
||||
write_json(call_stack_t* _call_stack);
|
||||
|
||||
void
|
||||
write_perfetto();
|
||||
|
||||
void
|
||||
tool_fini(void* tool_data)
|
||||
{
|
||||
@@ -1028,6 +1227,7 @@ tool_fini(void* tool_data)
|
||||
<< ", hsa_api_bf_records=" << hsa_api_bf_records.size()
|
||||
<< ", hip_api_bf_records=" << hip_api_bf_records.size()
|
||||
<< ", marker_api_bf_records=" << marker_api_bf_records.size()
|
||||
<< ", corr_id_retire_records=" << corr_id_retire_records.size()
|
||||
<< ", counter_collection_records" << counter_collection_bf_records.size() << "...\n"
|
||||
<< std::flush;
|
||||
|
||||
@@ -1038,6 +1238,7 @@ tool_fini(void* tool_data)
|
||||
}
|
||||
|
||||
write_json(_call_stack);
|
||||
write_perfetto();
|
||||
|
||||
std::cerr << "[" << getpid() << "][" << __FUNCTION__ << "] Finalization complete.\n"
|
||||
<< std::flush;
|
||||
@@ -1128,6 +1329,7 @@ write_json(call_stack_t* _call_stack)
|
||||
json_ar(cereal::make_nvp("hsa_api_traces", hsa_api_bf_records));
|
||||
json_ar(cereal::make_nvp("hip_api_traces", hip_api_bf_records));
|
||||
json_ar(cereal::make_nvp("marker_api_traces", marker_api_bf_records));
|
||||
json_ar(cereal::make_nvp("retired_correlation_ids", corr_id_retire_records));
|
||||
json_ar(cereal::make_nvp("counter_collection", counter_collection_bf_records));
|
||||
} catch(std::exception& e)
|
||||
{
|
||||
@@ -1145,6 +1347,335 @@ write_json(call_stack_t* _call_stack)
|
||||
if(cleanup) cleanup(ofs);
|
||||
}
|
||||
|
||||
void
|
||||
write_perfetto()
|
||||
{
|
||||
auto args = ::perfetto::TracingInitArgs{};
|
||||
auto track_event_cfg = ::perfetto::protos::gen::TrackEventConfig{};
|
||||
auto cfg = ::perfetto::TraceConfig{};
|
||||
|
||||
// environment settings
|
||||
auto shmem_size_hint = size_t{64};
|
||||
auto buffer_size_kb = size_t{1024000};
|
||||
|
||||
auto* buffer_config = cfg.add_buffers();
|
||||
buffer_config->set_size_kb(buffer_size_kb);
|
||||
buffer_config->set_fill_policy(
|
||||
::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD);
|
||||
|
||||
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("track_event"); // this MUST be track_event
|
||||
ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
|
||||
|
||||
args.shmem_size_hint_kb = shmem_size_hint;
|
||||
args.backends |= ::perfetto::kInProcessBackend;
|
||||
|
||||
::perfetto::Tracing::Initialize(args);
|
||||
::perfetto::TrackEvent::Register();
|
||||
|
||||
auto tracing_session = ::perfetto::Tracing::NewTrace();
|
||||
|
||||
tracing_session->Setup(cfg);
|
||||
tracing_session->StartBlocking();
|
||||
|
||||
auto tids = std::set<rocprofiler_thread_id_t>{};
|
||||
auto agent_ids = std::set<uint64_t>{};
|
||||
auto agent_queue_ids = std::map<uint64_t, std::set<uint64_t>>{};
|
||||
|
||||
auto _get_agent = [](uint64_t id_handle) -> const rocprofiler_agent_t* {
|
||||
for(const auto& itr : agents)
|
||||
{
|
||||
if(id_handle == itr.id.handle) return &itr;
|
||||
}
|
||||
return nullptr;
|
||||
};
|
||||
|
||||
{
|
||||
for(auto itr : hsa_api_bf_records)
|
||||
tids.emplace(itr.thread_id);
|
||||
for(auto itr : hip_api_bf_records)
|
||||
tids.emplace(itr.thread_id);
|
||||
for(auto itr : marker_api_bf_records)
|
||||
tids.emplace(itr.thread_id);
|
||||
|
||||
for(auto itr : memory_copy_records)
|
||||
{
|
||||
agent_ids.emplace(itr.dst_agent_id.handle);
|
||||
agent_ids.emplace(itr.src_agent_id.handle);
|
||||
}
|
||||
|
||||
for(auto itr : kernel_dispatch_records)
|
||||
agent_queue_ids[itr.agent_id.handle].emplace(itr.queue_id.handle);
|
||||
}
|
||||
|
||||
auto thread_tracks = std::unordered_map<rocprofiler_thread_id_t, ::perfetto::Track>{};
|
||||
|
||||
uint64_t nthrn = 0;
|
||||
for(auto itr : tids)
|
||||
{
|
||||
if(itr == main_tid)
|
||||
thread_tracks.emplace(main_tid, ::perfetto::ThreadTrack::Current());
|
||||
else
|
||||
{
|
||||
auto _track = ::perfetto::Track{itr};
|
||||
auto _desc = _track.Serialize();
|
||||
auto _namess = std::stringstream{};
|
||||
_namess << "Thread " << ++nthrn << " (" << itr << ")";
|
||||
_desc.set_name(_namess.str());
|
||||
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
||||
|
||||
thread_tracks.emplace(itr, _track);
|
||||
}
|
||||
}
|
||||
|
||||
auto agent_tracks = std::unordered_map<uint64_t, ::perfetto::Track>{};
|
||||
|
||||
for(auto itr : agent_ids)
|
||||
{
|
||||
const auto* _agent = _get_agent(itr);
|
||||
if(!_agent) throw std::runtime_error{"agent lookup error"};
|
||||
|
||||
auto _namess = std::stringstream{};
|
||||
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_namess << "CPU COPY [" << itr << "] ";
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_namess << "GPU COPY [" << itr << "] ";
|
||||
|
||||
if(!std::string_view{_agent->model_name}.empty())
|
||||
_namess << _agent->model_name;
|
||||
else
|
||||
_namess << _agent->product_name;
|
||||
|
||||
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
|
||||
auto _desc = _track.Serialize();
|
||||
_desc.set_name(_namess.str());
|
||||
|
||||
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
||||
|
||||
agent_tracks.emplace(itr, _track);
|
||||
}
|
||||
|
||||
auto agent_queue_tracks =
|
||||
std::unordered_map<uint64_t, std::unordered_map<uint64_t, ::perfetto::Track>>{};
|
||||
|
||||
for(const auto& aitr : agent_queue_ids)
|
||||
{
|
||||
uint32_t nqueue = 0;
|
||||
for(auto qitr : aitr.second)
|
||||
{
|
||||
const auto* _agent = _get_agent(aitr.first);
|
||||
if(!_agent) throw std::runtime_error{"agent lookup error"};
|
||||
|
||||
auto _namess = std::stringstream{};
|
||||
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_namess << "CPU COMPUTE [" << aitr.first << "] ";
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_namess << "GPU COMPUTE [" << aitr.first << "] ";
|
||||
|
||||
_namess << " Queue [" << nqueue++ << "]";
|
||||
|
||||
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
|
||||
auto _desc = _track.Serialize();
|
||||
_desc.set_name(_namess.str());
|
||||
|
||||
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
||||
|
||||
agent_queue_tracks[aitr.first].emplace(qitr, _track);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto buffer_name_info = get_buffer_tracing_names();
|
||||
auto callbk_name_info = get_callback_tracing_names();
|
||||
|
||||
for(auto itr : hsa_api_bf_records)
|
||||
{
|
||||
auto& name = buffer_name_info.operation_names.at(itr.kind).at(itr.operation);
|
||||
auto& track = thread_tracks.at(itr.thread_id);
|
||||
|
||||
auto _args = callback_arg_array_t{};
|
||||
auto ritr = std::find_if(
|
||||
hsa_api_cb_records.begin(), hsa_api_cb_records.end(), [&itr](const auto& citr) {
|
||||
return (citr.record.correlation_id.internal == itr.correlation_id.internal);
|
||||
});
|
||||
if(ritr != hsa_api_cb_records.end()) _args = ritr->args;
|
||||
|
||||
TRACE_EVENT_BEGIN(rocprofiler::trait::name<rocprofiler::category::hsa_api>::value,
|
||||
::perfetto::StaticString(name.c_str()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"tid",
|
||||
itr.thread_id,
|
||||
"kind",
|
||||
itr.kind,
|
||||
"operation",
|
||||
itr.operation,
|
||||
"cid",
|
||||
itr.correlation_id.internal,
|
||||
[&](::perfetto::EventContext ctx) {
|
||||
for(const auto& aitr : _args)
|
||||
add_perfetto_annotation(ctx, aitr.first, aitr.second);
|
||||
});
|
||||
TRACE_EVENT_END(rocprofiler::trait::name<rocprofiler::category::hsa_api>::value,
|
||||
track,
|
||||
itr.end_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp);
|
||||
}
|
||||
|
||||
for(auto itr : hip_api_bf_records)
|
||||
{
|
||||
auto& name = buffer_name_info.operation_names.at(itr.kind).at(itr.operation);
|
||||
auto& track = thread_tracks.at(itr.thread_id);
|
||||
|
||||
auto _args = callback_arg_array_t{};
|
||||
auto ritr = std::find_if(
|
||||
hip_api_cb_records.begin(), hip_api_cb_records.end(), [&itr](const auto& citr) {
|
||||
return (citr.record.correlation_id.internal == itr.correlation_id.internal);
|
||||
});
|
||||
if(ritr != hip_api_cb_records.end()) _args = ritr->args;
|
||||
|
||||
TRACE_EVENT_BEGIN(rocprofiler::trait::name<rocprofiler::category::hip_api>::value,
|
||||
::perfetto::StaticString(name.c_str()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"tid",
|
||||
itr.thread_id,
|
||||
"kind",
|
||||
itr.kind,
|
||||
"operation",
|
||||
itr.operation,
|
||||
"cid",
|
||||
itr.correlation_id.internal,
|
||||
[&](::perfetto::EventContext ctx) {
|
||||
for(const auto& aitr : _args)
|
||||
add_perfetto_annotation(ctx, aitr.first, aitr.second);
|
||||
});
|
||||
TRACE_EVENT_END(rocprofiler::trait::name<rocprofiler::category::hip_api>::value,
|
||||
track,
|
||||
itr.end_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp);
|
||||
}
|
||||
|
||||
for(auto itr : memory_copy_records)
|
||||
{
|
||||
auto& name = buffer_name_info.operation_names.at(itr.kind).at(itr.operation);
|
||||
auto& track = agent_tracks.at(itr.dst_agent_id.handle);
|
||||
|
||||
TRACE_EVENT_BEGIN(rocprofiler::trait::name<rocprofiler::category::memory_copy>::value,
|
||||
::perfetto::StaticString(name.c_str()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"kind",
|
||||
itr.kind,
|
||||
"operation",
|
||||
itr.operation,
|
||||
"src_agent",
|
||||
itr.src_agent_id.handle,
|
||||
"dst_agent",
|
||||
itr.dst_agent_id.handle);
|
||||
TRACE_EVENT_END(rocprofiler::trait::name<rocprofiler::category::memory_copy>::value,
|
||||
track,
|
||||
itr.end_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp);
|
||||
}
|
||||
|
||||
auto demangled = std::unordered_map<std::string_view, std::string>{};
|
||||
for(auto itr : kernel_dispatch_records)
|
||||
{
|
||||
const kernel_symbol_callback_record_t* sym = nullptr;
|
||||
for(const auto& kitr : kernel_symbol_records)
|
||||
{
|
||||
if(kitr.payload.kernel_id == itr.kernel_id)
|
||||
{
|
||||
sym = &kitr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto name = std::string_view{sym->payload.kernel_name};
|
||||
auto& track = agent_queue_tracks.at(itr.agent_id.handle).at(itr.queue_id.handle);
|
||||
|
||||
if(demangled.find(name) == demangled.end())
|
||||
{
|
||||
demangled.emplace(name, demangle(name));
|
||||
}
|
||||
|
||||
TRACE_EVENT_BEGIN(
|
||||
rocprofiler::trait::name<rocprofiler::category::kernel_dispatch>::value,
|
||||
::perfetto::StaticString(demangled.at(name).c_str()),
|
||||
track,
|
||||
itr.start_timestamp,
|
||||
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
|
||||
"begin_ns",
|
||||
itr.start_timestamp,
|
||||
"kind",
|
||||
itr.kind,
|
||||
"agent",
|
||||
itr.agent_id.handle,
|
||||
"cid",
|
||||
itr.correlation_id.internal,
|
||||
"queue",
|
||||
itr.queue_id.handle,
|
||||
"kid",
|
||||
itr.kernel_id,
|
||||
"private_segment_size",
|
||||
itr.private_segment_size,
|
||||
"group_segment_size",
|
||||
itr.group_segment_size,
|
||||
"workgroup_size",
|
||||
itr.workgroup_size.x * itr.workgroup_size.y * itr.workgroup_size.z,
|
||||
"grid_size",
|
||||
itr.grid_size.x * itr.grid_size.y * itr.grid_size.z);
|
||||
|
||||
TRACE_EVENT_END(rocprofiler::trait::name<rocprofiler::category::kernel_dispatch>::value,
|
||||
track,
|
||||
itr.end_timestamp,
|
||||
"end_ns",
|
||||
itr.end_timestamp);
|
||||
}
|
||||
}
|
||||
|
||||
::perfetto::TrackEvent::Flush();
|
||||
tracing_session->FlushBlocking();
|
||||
tracing_session->StopBlocking();
|
||||
|
||||
using char_vec_t = std::vector<char>;
|
||||
|
||||
auto trace_data = char_vec_t{tracing_session->ReadTraceBlocking()};
|
||||
|
||||
if(!trace_data.empty())
|
||||
{
|
||||
auto ofname = std::string{"rocprofiler-tool-results.pftrace"};
|
||||
if(auto* eofname = getenv("ROCPROFILER_TOOL_OUTPUT_FILE")) ofname = eofname;
|
||||
|
||||
auto jpos = ofname.find(".json");
|
||||
if(jpos != std::string::npos) ofname = ofname.substr(0, jpos) + std::string{".pftrace"};
|
||||
|
||||
std::clog << "Writing perfetto trace file: " << ofname << std::endl;
|
||||
auto ofs = std::ofstream{ofname};
|
||||
// Write the trace into a file.
|
||||
ofs.write(trace_data.data(), trace_data.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error{"no trace data"};
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
start()
|
||||
{
|
||||
@@ -1179,11 +1710,9 @@ flush()
|
||||
{
|
||||
for(auto* itr : buffers)
|
||||
{
|
||||
if(!itr) continue;
|
||||
auto status = rocprofiler_flush_buffer(*itr);
|
||||
if(status != ROCPROFILER_STATUS_ERROR_BUFFER_BUSY)
|
||||
if(itr && itr->handle > 0)
|
||||
{
|
||||
ROCPROFILER_CALL(status, "buffer flush");
|
||||
ROCPROFILER_CALL(rocprofiler_flush_buffer(*itr), "buffer flush");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1265,3 +1794,5 @@ rocprofiler_configure(uint32_t version,
|
||||
// return pointer to configure data
|
||||
return &cfg;
|
||||
}
|
||||
|
||||
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
|
||||
|
||||
Referencia en una nueva incidencia
Block a user