Update correlation id definition + status strings + const active contexts (#127)

* Update include/rocprofiler

- remove rocprofiler_external_correlation_id_t
- redefine rocprofiler_correlation_id_t to include internal id and external user data
- associate rocprofiler_push_external_correlation_id and rocprofiler_pop_external_correlation_id with a context

* Update include/rocprofiler/rocprofiler.h

- rocprofiler_get_status_name
- rocprofiler_get_status_string

* Update lib/rocprofiler/rocprofiler.cpp

- implement rocprofiler_get_status_name and rocprofiler_get_status_string

* Update lib/rocprofiler/tests/status.cpp

- unit test for status string and name

* Update lib/rocprofiler/tests/registration.cpp

- update to new rocprofiler_correlation_id_t

* Update samples

- update to new rocprofiler_correlation_id_t

* Add lib/rocprofiler/external_correlation.cpp

- placeholder for external correlation push/pop

* Update lib/rocprofiler/hsa/agent_cache.cpp

- slight tweak to when HSA_AMD_AGENT_INFO_NEAREST_CPU is defined

* Update context implementation and hsa.cpp

- get_active_contexts is array of const context pointers
- update hsa_api_impl<Idx>::functor to new rocprofiler_correlation_id_t

* Update include/rocprofiler/fwd.h

- add ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT
- reorder enum for consistency

* Update include/rocprofiler/external_correlation.h

- doxygen comments
- thread id parameter

* Update include/rocprofiler/rocprofiler.h

- add rocprofiler_get_thread_id function (needed for external corr id)

* Update lib/common/synchronized.hpp

- explicit LockedType
- define all copy/move ctor and assignment
- update rlock/wlock/ulock to support arguments and return values
- Support additional template parameter for special case of synchronized instance which is the mapped type of a sychronized map

* Update lib/rocprofiler/external_correlation.cpp

- implement rocprofiler_{push,pop}_external_correlation_id

* Update lib/rocprofiler/CMakeLists.txt

- external_correlation.hpp

* Update lib/rocprofiler/rocprofiler.cpp

- status string for ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT
- implement rocprofiler_get_thread_id

* Update lib/rocprofiler/tests (external correlation)

- add external_correlation unit tests

* Update include/rocprofiler/callback_tracing.h

- doxygen comments
- callback invoked in callback tracing has user_data pointer passed to it

* Update samples/api_callback_tracing/client.cpp

- add rocprofiler_user_data_t* to tool_tracing_callback

* Update lib/rocprofiler/tests/registration.cpp

- add rocprofiler_user_data_t* to tool_tracing_callback

* Update lib/rocprofiler/context/context.{hpp,cpp}

- update correlation_tracing_service
  - external_correlation instance
  - rename get_unique_record_id to get_unique_internal_id

* Update lib/tests/common/demangling.cpp

- tweak mangled definitions due to changing function get_unique_record_id to get_unique_internal_id

* Update lib/rocprofiler/hsa/hsa.cpp

- handle updates to external correlation id
- handle updates to callback signature in callback tracing

* Update CMakeLists.txt

- CMAKE_BUILD_TYPE=Coverage defines CODECOV=1

* Update samples/api_callback_tracing/client.cpp

[ROCm/rocprofiler-sdk commit: 6a3f79e626]
Этот коммит содержится в:
Jonathan R. Madsen
2023-10-18 13:59:41 -05:00
коммит произвёл GitHub
родитель 343f8679e7
Коммит 267954fdd3
24 изменённых файлов: 1106 добавлений и 124 удалений
+1 -1
Просмотреть файл
@@ -16,7 +16,7 @@ string(REGEX REPLACE "([0-9]+)\.([0-9]+)\.([0-9]+)(.*)" "\\1.\\2.\\3" ROCPROFILE
foreach(_LANG C CXX)
set(CMAKE_${_LANG}_FLAGS_COVERAGE_INIT
"-Og -g3 -fno-omit-frame-pointer -fprofile-abs-path -fprofile-arcs -ftest-coverage --coverage"
"-Og -g3 -fno-omit-frame-pointer -fprofile-abs-path -fprofile-arcs -ftest-coverage --coverage -DCODECOV=1"
CACHE STRING "${_LANG} flags for code coverage builds")
set(CMAKE_${_LANG}_FLAGS_COVERAGE
"${CMAKE_${_LANG}_FLAGS_COVERAGE_INIT}"
+4 -4
Просмотреть файл
@@ -211,10 +211,10 @@ tool_tracing_callback(rocprofiler_context_id_t context,
static_cast<rocprofiler_buffer_tracing_hsa_api_record_t*>(header->payload);
auto info = std::stringstream{};
info << "tid=" << record->thread_id << ", context=" << context.handle
<< ", buffer_id=" << buffer_id.handle << ", cid=" << record->correlation_id.id
<< ", kind=" << record->kind << ", operation=" << record->operation
<< ", drop_count=" << drop_count << ", start=" << record->start_timestamp
<< ", stop=" << record->end_timestamp;
<< ", buffer_id=" << buffer_id.handle
<< ", cid=" << record->correlation_id.internal << ", kind=" << record->kind
<< ", operation=" << record->operation << ", drop_count=" << drop_count
<< ", start=" << record->start_timestamp << ", stop=" << record->end_timestamp;
if(record->start_timestamp > record->end_timestamp)
throw std::runtime_error("start > end");
+4 -2
Просмотреть файл
@@ -167,12 +167,14 @@ store_callback_id_names(call_stack_t* tool_data)
}
void
tool_tracing_callback(rocprofiler_callback_tracing_record_t record, void* user_data)
tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
rocprofiler_user_data_t*,
void* user_data)
{
assert(user_data != nullptr);
auto info = std::stringstream{};
info << "tid=" << record.thread_id << ", cid=" << record.correlation_id.id
info << "tid=" << record.thread_id << ", cid=" << record.correlation_id.internal
<< ", kind=" << record.kind << ", operation=" << record.operation
<< ", phase=" << record.phase;
+1 -1
Просмотреть файл
@@ -44,7 +44,7 @@ test_callback(rocprofiler_queue_id_t queue_id,
{
// Callback containing counter data.
std::clog << "[" << __FUNCTION__ << "] " << queue_id.handle << " | " << agent_id.id.handle
<< " | " << corr_id.id << "\n";
<< " | " << corr_id.internal << "\n";
}
int
+1 -2
Просмотреть файл
@@ -200,8 +200,7 @@ typedef struct
{
rocprofiler_service_buffer_tracing_kind_t kind;
rocprofiler_correlation_id_t correlation_id;
rocprofiler_external_correlation_id_t external_correlation_id;
} rocprofiler_buffer_tracing_external_correlation_record_t;
} rocprofiler_buffer_tracing_correlation_record_t;
/**
* @brief Callback function for mapping @ref rocprofiler_service_buffer_tracing_kind_t ids to
+42 -10
Просмотреть файл
@@ -132,10 +132,26 @@ typedef struct
} rocprofiler_callback_tracer_code_object_register_host_kernel_symbol_data_t;
/**
* @brief API Tracing callback function.
* @brief API Tracing callback function. This function is invoked twice per API function: once
* before the function is invoked and once after the function is invoked. The external correlation
* id value within the record is assigned the value at the top of the external correlation id stack.
* It is permissible to invoke @ref rocprofiler_push_external_correlation_id within the enter phase;
* when a new external correlation id is pushed during the enter phase, rocprofiler will use that
* external correlation id for any async events and provide the new external correlation id during
* the exit callback... In other words, pushing a new external correlation id within the enter
* callback will result in that external correlation id value in the exit callback (which may or may
* not be different from the external correlation id value in the enter callback). If a tool pushes
* new external correlation ids in the enter phase, it is recommended to pop the external
* correlation id in the exit callback.
*
* @param record [in] Callback record data
* @param user_data [in,out] This paramter can be used to retain information in between the enter
* and exit phases.
* @param callback_data [in] User data provided when configuring the callback tracing service
*/
typedef void (*rocprofiler_callback_tracing_cb_t)(rocprofiler_callback_tracing_record_t record,
void* user_data);
rocprofiler_user_data_t* user_data,
void* callback_data) ROCPROFILER_NONNULL(2);
/**
* @brief Callback function for mapping @ref rocprofiler_service_callback_tracing_kind_t ids to
@@ -180,15 +196,31 @@ typedef int (*rocprofiler_callback_tracing_operation_args_cb_t)(
void* data);
/**
* @brief Configure Callback Tracing Service.
* @brief Configure Callback Tracing Service. The callback tracing service provides two synchronous
* callbacks around an API function on the same thread as the application which is invoking the API
* function. This function can only be invoked once per @ref
* rocprofiler_service_callback_tracing_kind_t value, i.e. it can be invoked once for the HSA API,
* once for the HIP API, and so on but it will fail if it is invoked for the HSA API twice. Please
* note, the callback API does have the potentially non-trivial overhead of copying the function
* arguments into the record. If you are willing to let rocprofiler record the timestamps, do not
* require synchronous notifications of the API calls, and want to lowest possible overhead, use the
* @see BUFFER_TRACING_SERVICE.
*
* @param [in] context_id
* @param [in] kind
* @param [in] operations
* @param [in] operations_count
* @param [in] callback
* @param [in] callback_args
* @return ::rocprofiler_status_t
* @param [in] context_id Context to associate the service with
* @param [in] kind The domain of the callback tracing service
* @param [in] operations Array of operations in the domain (i.e. enum values which identify
* specific API functions). If this is null, all API functions in the domain will be traced
* @param [in] operations_count If the operations array is non-null, set this to the size of the
* array.
* @param [in] callback The function to invoke before and after an API function
* @param [in] callback_args Data provided to every invocation of the callback function
* @return ::rocprofiler_status_t Will return @ref ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED if
* invoked outside of the initialization function in @ref rocprofiler_tool_configure_result_t
* provided to rocprofiler via @ref rocprofiler_configure function. Will return @ref
* ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND if the provided context is not valid/registered. Will
* return @ref ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGURED if the same @ref
* rocprofiler_service_callback_tracing_kind_t value is provided more than once (per context) -- in
* other words, we do not support overriding or combining the operations in separate function calls.
*
*/
rocprofiler_status_t ROCPROFILER_API
+19 -10
Просмотреть файл
@@ -37,23 +37,32 @@ ROCPROFILER_EXTERN_C_INIT
/** @} */
/**
* @brief ROCProfiler Push External Correlation ID.
* @brief Push default value for `external` field in @ref rocprofiler_correlation_id_t onto stack.
*
* @param external_correlation_id
* @return rocprofiler_status_t
* @param context [in] Associated context
* @param tid [in] thread identifier. @see rocprofiler_get_thread_id
* @param external_correlation_id [in] User data to place in external field in @ref
* rocprofiler_correlation_id_t
* @return rocprofiler_status_t Returns ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND if the context
* does not exist. Returns ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT if thread id is not valid.
*/
rocprofiler_status_t ROCPROFILER_API
rocprofiler_push_external_correlation_id(
rocprofiler_external_correlation_id_t external_correlation_id);
rocprofiler_push_external_correlation_id(rocprofiler_context_id_t context,
rocprofiler_thread_id_t tid,
rocprofiler_user_data_t external_correlation_id);
/**
* @brief ROCProfiler Push External Correlation ID.
* @brief Pop default value for `external` field in @ref rocprofiler_correlation_id_t off of stack
*
* @param external_correlation_id
* @return rocprofiler_status_t
* @param context [in] Associated context
* @param tid [in] thread identifier. @see rocprofiler_get_thread_id
* @param external_correlation_id [out] Correlation id data popped off the stack
* @return rocprofiler_status_t Returns ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND if the context
* does not exist. Returns ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT if thread id is not valid.
*/
rocprofiler_status_t ROCPROFILER_API
rocprofiler_pop_external_correlation_id(
rocprofiler_external_correlation_id_t* external_correlation_id);
rocprofiler_pop_external_correlation_id(rocprofiler_context_id_t context,
rocprofiler_thread_id_t tid,
rocprofiler_user_data_t* external_correlation_id);
ROCPROFILER_EXTERN_C_FINI
+7 -13
Просмотреть файл
@@ -54,6 +54,8 @@ typedef enum // NOLINT(performance-enum-size)
ROCPROFILER_STATUS_ERROR_DOMAIN_NOT_FOUND, ///< Domain identifier is invalid
ROCPROFILER_STATUS_ERROR_OPERATION_NOT_FOUND, ///< Operation identifier is invalid for domain
ROCPROFILER_STATUS_ERROR_THREAD_NOT_FOUND, ///< No valid thread for given thread id
ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND, ///< Agent identifier not found
ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND, ///< Counter identifier does not exist
ROCPROFILER_STATUS_ERROR_CONTEXT_ERROR, ///> Generalized context error
ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID, ///< Context configuration is not valid
ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_STARTED, ///< Context was not started (maybe already
@@ -69,8 +71,8 @@ typedef enum // NOLINT(performance-enum-size)
ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED, ///< Function is not implemented
ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI, ///< Data structure provided by user is incompatible
///< with current version of rocprofiler
ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND, ///< Agent not found
ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND, ///< Counter does not exist
ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT, ///< Function invoked with one or more invalid
///< arguments
ROCPROFILER_STATUS_LAST,
} rocprofiler_status_t;
@@ -282,17 +284,10 @@ typedef struct
*/
typedef struct
{
uint64_t id;
uint64_t internal;
rocprofiler_user_data_t external;
} rocprofiler_correlation_id_t;
/**
* @brief ROCProfiler External Correlation ID.
*/
typedef struct
{
uint64_t id;
} rocprofiler_external_correlation_id_t;
/**
* @struct rocprofiler_buffer_id_t
* @brief Buffer ID.
@@ -332,13 +327,12 @@ typedef struct
*/
typedef struct rocprofiler_callback_tracing_record_t
{
rocprofiler_context_id_t context_id;
rocprofiler_thread_id_t thread_id;
rocprofiler_correlation_id_t correlation_id;
rocprofiler_external_correlation_id_t external_correlation_id;
rocprofiler_service_callback_tracing_kind_t kind;
uint32_t operation;
rocprofiler_service_callback_phase_t phase;
rocprofiler_user_data_t data;
void* payload;
} rocprofiler_callback_tracing_record_t;
+26
Просмотреть файл
@@ -97,6 +97,32 @@ ROCPROFILER_EXTERN_C_INIT
rocprofiler_status_t
rocprofiler_get_timestamp(rocprofiler_timestamp_t* ts) ROCPROFILER_API ROCPROFILER_NONNULL(1);
/**
* @fn rocprofiler_status_t rocprofiler_get_thread_id(rocprofiler_thread_id_t* tid)
* @brief Get the identifier value of the current thread that is used by rocprofiler
* @param [out] tid Output address of the rocprofiler thread id value
*/
rocprofiler_status_t
rocprofiler_get_thread_id(rocprofiler_thread_id_t* tid) ROCPROFILER_API ROCPROFILER_NONNULL(1);
/**
* @fn const char* rocprofiler_get_status_name(rocprofiler_status_t status)
* @brief Return the string encoding of @ref rocprofiler_status_t value
* @param [in] status error code value
* @return Will return a nullptr if invalid/unsupported @ref rocprofiler_status_t value is provided.
*/
const char*
rocprofiler_get_status_name(rocprofiler_status_t status) ROCPROFILER_API;
/**
* @fn const char* rocprofiler_get_status_string(rocprofiler_status_t status)
* @brief Return the message associated with @ref rocprofiler_status_t value
* @param [in] status error code value
* @return Will return a nullptr if invalid/unsupported @ref rocprofiler_status_t value is provided.
*/
const char*
rocprofiler_get_status_string(rocprofiler_status_t status) ROCPROFILER_API;
/** @} */
ROCPROFILER_EXTERN_C_FINI
+62 -17
Просмотреть файл
@@ -26,6 +26,7 @@
#include <functional>
#include <mutex>
#include <shared_mutex>
#include <type_traits>
namespace rocprofiler
{
@@ -52,45 +53,89 @@ namespace common
* // set data to new value
* });
*/
template <typename LockedType>
template <typename LockedType, bool IsMappedTypeV = false>
class Synchronized
{
public:
Synchronized() = default;
Synchronized(LockedType&& data)
: data_(std::move(data))
using value_type = LockedType;
using this_type = Synchronized<value_type, IsMappedTypeV>;
Synchronized() = default;
~Synchronized() = default;
explicit Synchronized(value_type&& data)
: m_data{std::move(data)}
{}
Synchronized(Synchronized&& data) noexcept = default;
Synchronized& operator=(Synchronized&& data) noexcept = default;
// Do not allow this data structure to be copied, std::move only.
Synchronized(const Synchronized&) = delete;
Synchronized& operator=(const Synchronized&) = delete;
void rlock(std::function<void(const LockedType&)> lambda) const
template <typename FuncT, typename... Args>
auto rlock(FuncT&& lambda, Args&&... args) const
{
std::shared_lock lock(mutex_);
lambda(data_);
static_assert(std::is_invocable<FuncT, const value_type&, Args...>::value,
"function must accept const reference to locked type");
auto lock = std::shared_lock{m_mutex};
return std::forward<FuncT>(lambda)(m_data, std::forward<Args>(args)...);
}
void wlock(std::function<void(LockedType&)> lambda)
template <typename FuncT, typename... Args>
auto wlock(FuncT&& lambda, Args&&... args)
{
std::unique_lock lock(mutex_);
lambda(data_);
static_assert(std::is_invocable<FuncT, value_type&, Args...>::value,
"function must accept reference to locked type");
auto lock = std::unique_lock{m_mutex};
return std::forward<FuncT>(lambda)(m_data, std::forward<Args>(args)...);
}
// This overload to wlock allows a synchronized map whose keys map to synchronized data to
// use a read lock on the key data and then a write lock on the mapped data.
template <typename FuncT,
typename... Args,
bool EnableForMappedType = IsMappedTypeV,
std::enable_if_t<EnableForMappedType, int> = 0>
auto wlock(FuncT&& lambda, Args&&... args) const
{
return const_cast<this_type*>(this)->wlock(std::forward<FuncT>(lambda),
std::forward<Args>(args)...);
}
// Upgradable lock. If read returns false, write will be called with a unique_lock.
// Essentially a helper function that does .rlock() followed by .wlock().
void ulock(std::function<bool(const LockedType&)> read, std::function<bool(LockedType&)> write)
template <typename ReadFuncT, typename WriteFuncT, typename... Args>
bool ulock(ReadFuncT&& read, WriteFuncT&& write, Args&&... args)
{
static_assert(std::is_invocable<ReadFuncT, const value_type&, Args...>::value,
"read function must accept const reference to locked type");
static_assert(std::is_invocable<WriteFuncT, value_type&, Args...>::value,
"write function must accept reference to locked type");
using read_return_type = std::invoke_result_t<ReadFuncT, const value_type&, Args...>;
using write_return_type = std::invoke_result_t<WriteFuncT, value_type&, Args...>;
static_assert(std::is_same<read_return_type, write_return_type>::value,
"read and write functions must return same type");
static_assert(std::is_same<read_return_type, bool>::value,
"read/write functions must return bool");
{
std::shared_lock lock(mutex_);
if(read(data_)) return;
auto lock = std::shared_lock{m_mutex};
if(read(m_data, std::forward<Args>(args)...)) return true;
}
std::unique_lock lock(mutex_);
write(data_);
auto lock = std::unique_lock{m_mutex};
return write(m_data, std::forward<Args>(args)...);
}
private:
mutable std::shared_mutex mutex_;
LockedType data_;
mutable std::shared_mutex m_mutex = {};
value_type m_data = {};
};
} // namespace common
} // namespace rocprofiler
+3 -1
Просмотреть файл
@@ -3,7 +3,8 @@
#
rocprofiler_activate_clang_tidy()
set(ROCPROFILER_LIB_HEADERS buffer.hpp internal_threading.hpp registration.hpp)
set(ROCPROFILER_LIB_HEADERS buffer.hpp external_correlation.hpp internal_threading.hpp
registration.hpp)
set(ROCPROFILER_LIB_SOURCES
agent.cpp
buffer.cpp
@@ -12,6 +13,7 @@ set(ROCPROFILER_LIB_SOURCES
context.cpp
counters.cpp
dispatch_profile.cpp
external_correlation.cpp
internal_threading.cpp
pc_sampling.cpp
profile_config.cpp
+1 -1
Просмотреть файл
@@ -65,7 +65,7 @@ rocprofiler_context_is_active(rocprofiler_context_id_t context_id, int* status)
*status = 0;
for(const auto& itr : rocprofiler::context::get_active_contexts())
{
auto* cfg = itr.load(std::memory_order_relaxed);
const auto* cfg = itr.load(std::memory_order_relaxed);
if(cfg && cfg->context_idx == context_id.handle)
{
*status = 1;
+6 -6
Просмотреть файл
@@ -61,7 +61,7 @@ get_client_index()
} // namespace
uint64_t
correlation_tracing_service::get_unique_record_id()
correlation_tracing_service::get_unique_internal_id()
{
static auto _v = std::atomic<uint64_t>{};
return _v++;
@@ -181,7 +181,7 @@ start_context(rocprofiler_context_id_t context_id)
// try to find a nullptr slot first
for(size_t i = 0; i < get_active_contexts().size(); ++i)
{
auto* itr = get_active_contexts().at(i).load(std::memory_order_relaxed);
const auto* itr = get_active_contexts().at(i).load(std::memory_order_relaxed);
if(itr == nullptr)
{
idx = i;
@@ -201,8 +201,8 @@ start_context(rocprofiler_context_id_t context_id)
}
// atomic swap the pointer into the "active" array used internally
context* _expected = nullptr;
bool success = get_active_contexts().at(idx).compare_exchange_strong(
const context* _expected = nullptr;
bool success = get_active_contexts().at(idx).compare_exchange_strong(
_expected, get_registered_contexts().at(context_id.handle).get());
if(!success) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_STARTED;
@@ -219,7 +219,7 @@ stop_context(rocprofiler_context_id_t idx)
// callbacks
for(auto& itr : get_active_contexts())
{
auto* _expected = itr.load(std::memory_order_relaxed);
const auto* _expected = itr.load(std::memory_order_relaxed);
if(_expected && _expected->context_idx == idx.handle)
{
bool success = itr.compare_exchange_strong(_expected, nullptr);
@@ -237,7 +237,7 @@ deactivate_client_contexts(rocprofiler_client_id_t client_id)
{
for(auto& itr : get_active_contexts())
{
auto* itr_v = itr.load();
const auto* itr_v = itr.load();
if(itr_v->client_idx == client_id.handle)
{
itr.store(nullptr);
+7 -8
Просмотреть файл
@@ -27,8 +27,10 @@
#include <rocprofiler/rocprofiler.h>
#include "lib/common/container/stable_vector.hpp"
#include "lib/common/synchronized.hpp"
#include "lib/rocprofiler/context/domain.hpp"
#include "lib/rocprofiler/counters/core.hpp"
#include "lib/rocprofiler/external_correlation.hpp"
#include <array>
#include <atomic>
@@ -48,11 +50,8 @@ using external_cid_cb_t = uint64_t (*)(rocprofiler_service_callback_tracing_kind
/// the rocprofiler generated correlation id
struct correlation_tracing_service
{
uint64_t id = 0;
uint64_t external_id = 0;
external_cid_cb_t external_id_callback = nullptr;
static uint64_t get_unique_record_id();
external_correlation::external_correlation external_correlator = {};
static uint64_t get_unique_internal_id();
};
struct callback_tracing_service
@@ -85,11 +84,11 @@ struct counter_collection_service
// Each instance is assocated with an agent and a counter collection profile.
// Contains callback information along with other data needed to collect/process
// counters.
std::vector<std::shared_ptr<rocprofiler::counters::counter_callback_info>> callbacks{};
std::vector<std::shared_ptr<counters::counter_callback_info>> callbacks{};
// A flag to state wether or not the counter set is currently enabled. This is primarily
// to protect against multithreaded calls to enable a context (and enabling already enabled
// counters).
rocprofiler::common::Synchronized<bool> enabled{false};
common::Synchronized<bool> enabled{false};
};
struct context
@@ -135,7 +134,7 @@ start_context(rocprofiler_context_id_t id);
rocprofiler_status_t stop_context(rocprofiler_context_id_t);
using unique_context_vec_t = common::container::stable_vector<std::unique_ptr<context>, 8>;
using active_context_vec_t = common::container::stable_vector<std::atomic<context*>, 8>;
using active_context_vec_t = common::container::stable_vector<std::atomic<const context*>, 8>;
unique_context_vec_t&
get_registered_contexts();
+150
Просмотреть файл
@@ -0,0 +1,150 @@
// MIT License
//
// Copyright (c) 2023 ROCm Developer Tools
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <rocprofiler/external_correlation.h>
#include <rocprofiler/fwd.h>
#include "lib/common/synchronized.hpp"
#include "lib/rocprofiler/context/context.hpp"
#include "lib/rocprofiler/external_correlation.hpp"
#include <unistd.h>
namespace rocprofiler
{
namespace external_correlation
{
rocprofiler_user_data_t
external_correlation::get(rocprofiler_thread_id_t tid) const
{
static constexpr auto empty_user_data = rocprofiler_user_data_t{.value = 0};
return data.rlock(
[](const external_correlation_map_t& _data, rocprofiler_thread_id_t tid_v) {
if(_data.count(tid_v) == 0) return empty_user_data;
const auto& itr = _data.at(tid_v);
return itr.rlock([](const external_correlation_stack_t& data_stack) {
if(data_stack.empty()) return empty_user_data;
return data_stack.back();
});
},
tid);
}
void
external_correlation::push(rocprofiler_thread_id_t tid, rocprofiler_user_data_t user_data)
{
// ensure that data contains key for provided thread id
while(!data.ulock(
[](const external_correlation_map_t& _data, rocprofiler_thread_id_t tid_v) {
return (_data.find(tid_v) != _data.end());
},
[](external_correlation_map_t& _data, rocprofiler_thread_id_t tid_v) {
_data.emplace(tid_v, external_correlation_stack_t{});
return true;
},
tid))
{}
// since we know from above that there will be a key for the tid, we start with a read
// lock and then once we have have the mapped data for the key, we leverage the enabling
// of the wlock const overload to remove the constness and use a write lock. If we were to use a
// write lock at the top lovel, then we would unnecessarily block other threads from writing to
// the stack of another thread
data.rlock(
[](const external_correlation_map_t& _data,
rocprofiler_thread_id_t tid_v,
rocprofiler_user_data_t user_data_v) {
const auto& itr = _data.at(tid_v);
itr.wlock([](external_correlation_stack_t& data_stack,
rocprofiler_user_data_t value) { data_stack.emplace_back(value); },
user_data_v);
},
tid,
user_data);
}
rocprofiler_user_data_t
external_correlation::pop(rocprofiler_thread_id_t tid)
{
static constexpr auto empty_user_data = rocprofiler_user_data_t{.value = 0};
return data.wlock(
[](external_correlation_map_t& _data, rocprofiler_thread_id_t tid_v) {
if(_data.count(tid_v) == 0) return empty_user_data;
auto& itr = _data.at(tid_v);
return itr.wlock([](external_correlation_stack_t& data_stack) {
if(data_stack.empty()) return empty_user_data;
auto ret = data_stack.back();
data_stack.pop_back();
return ret;
});
},
tid);
}
} // namespace external_correlation
} // namespace rocprofiler
extern "C" {
rocprofiler_status_t
rocprofiler_push_external_correlation_id(rocprofiler_context_id_t context,
rocprofiler_thread_id_t tid,
rocprofiler_user_data_t external_correlation_id)
{
// assumption is that thread ids are monotonically increasing from the pid
static uint64_t pid_v = getpid();
if(tid < pid_v) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
for(auto& itr : rocprofiler::context::get_registered_contexts())
{
if(itr->context_idx == context.handle)
{
itr->correlation_tracer.external_correlator.push(tid, external_correlation_id);
return ROCPROFILER_STATUS_SUCCESS;
}
}
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
}
rocprofiler_status_t
rocprofiler_pop_external_correlation_id(rocprofiler_context_id_t context,
rocprofiler_thread_id_t tid,
rocprofiler_user_data_t* external_correlation_id)
{
// assumption is that thread ids are monotonically increasing from the pid
static uint64_t pid_v = getpid();
if(tid < pid_v) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
for(auto& itr : rocprofiler::context::get_registered_contexts())
{
if(itr->context_idx == context.handle)
{
auto former = itr->correlation_tracer.external_correlator.pop(tid);
if(external_correlation_id) *external_correlation_id = former;
return ROCPROFILER_STATUS_SUCCESS;
}
}
return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND;
}
}
+61
Просмотреть файл
@@ -0,0 +1,61 @@
// MIT License
//
// Copyright (c) 2023 ROCm Developer Tools
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <rocprofiler/external_correlation.h>
#include <rocprofiler/fwd.h>
#include "lib/common/defines.hpp"
#include "lib/common/synchronized.hpp"
#include "lib/common/utility.hpp"
#include <cstdint>
#include <string>
#include <unordered_map>
#include <vector>
namespace rocprofiler
{
namespace external_correlation
{
static constexpr bool enable_const_wlock_v = true;
using external_correlation_stack_t = std::vector<rocprofiler_user_data_t>;
// we enable the wlock(...) const for the mapped type so that we can use wlock on the mapped type
// within a rlock of the external correlation map
using external_correlation_map_t =
std::unordered_map<rocprofiler_thread_id_t,
common::Synchronized<external_correlation_stack_t, enable_const_wlock_v>>;
struct external_correlation
{
rocprofiler_user_data_t get(rocprofiler_thread_id_t) const;
void push(rocprofiler_thread_id_t, rocprofiler_user_data_t);
rocprofiler_user_data_t pop(rocprofiler_thread_id_t);
private:
common::Synchronized<external_correlation_map_t> data = {};
};
} // namespace external_correlation
} // namespace rocprofiler
+2 -2
Просмотреть файл
@@ -29,7 +29,7 @@
#include "lib/common/utility.hpp"
// For Pre-ROCm 6.0 releases
#ifndef HSA_AMD_AGENT_INFO_NEAREST_CPU
#if ROCPROFILER_HSA_RUNTIME_VERSION <= 100900
# define HSA_AMD_AGENT_INFO_NEAREST_CPU 0xA113
#endif
@@ -213,4 +213,4 @@ AgentCache::AgentCache(rocprofiler_agent_t agent_t,
}
} // namespace hsa
} // namespace rocprofiler
} // namespace rocprofiler
+51 -38
Просмотреть файл
@@ -179,20 +179,23 @@ hsa_api_impl<Idx>::functor(Args&&... args)
struct callback_context_data
{
context::context* ctx = nullptr;
rocprofiler_callback_tracing_record_t record = {};
const context::context* ctx = nullptr;
rocprofiler_callback_tracing_record_t record = {};
rocprofiler_user_data_t user_data = {.value = 0};
};
struct buffered_context_data
{
context::context* ctx = nullptr;
const context::context* ctx = nullptr;
rocprofiler_user_data_t external_correlation = {};
};
auto thr_id = common::get_tid();
auto callback_contexts = std::vector<callback_context_data>{};
auto buffered_contexts = std::vector<buffered_context_data>{};
for(const auto& aitr : context::get_active_contexts())
{
auto* itr = aitr.load();
const auto* itr = aitr.load();
if(!itr) continue;
if(itr->callback_tracer)
@@ -209,7 +212,8 @@ hsa_api_impl<Idx>::functor(Args&&... args)
// if the given domain + op is not enabled, skip this context
if(itr->buffered_tracer->domains(info_type::buffered_domain_idx,
info_type::operation_idx))
buffered_contexts.emplace_back(buffered_context_data{itr});
buffered_contexts.emplace_back(buffered_context_data{
itr, itr->correlation_tracer.external_correlator.get(thr_id)});
}
}
@@ -222,19 +226,21 @@ hsa_api_impl<Idx>::functor(Args&&... args)
return HSA_STATUS_SUCCESS;
}
auto buffer_record = rocprofiler_buffer_tracing_hsa_api_record_t{};
auto tracer_data = rocprofiler_hsa_api_callback_tracer_data_t{};
auto corr_id = context::correlation_tracing_service::get_unique_record_id();
auto thr_id = common::get_tid();
constexpr auto empty_user_data = rocprofiler_user_data_t{.value = 0};
auto buffer_record = rocprofiler_buffer_tracing_hsa_api_record_t{};
auto tracer_data = rocprofiler_hsa_api_callback_tracer_data_t{};
auto internal_corr_id = context::correlation_tracing_service::get_unique_internal_id();
// construct the buffered info before the callback so the callbacks are as closely wrapped
// around the function call as possible
if(!buffered_contexts.empty())
{
buffer_record.kind = info_type::buffered_domain_idx;
buffer_record.correlation_id = rocprofiler_correlation_id_t{corr_id};
buffer_record.operation = info_type::operation_idx;
buffer_record.thread_id = thr_id;
buffer_record.kind = info_type::buffered_domain_idx;
// external correlation will be updated right before record is placed in buffer
buffer_record.correlation_id =
rocprofiler_correlation_id_t{internal_corr_id, empty_user_data};
buffer_record.operation = info_type::operation_idx;
buffer_record.thread_id = thr_id;
}
// invoke the callbacks
@@ -245,38 +251,39 @@ hsa_api_impl<Idx>::functor(Args&&... args)
for(auto& itr : callback_contexts)
{
auto& ctx = itr.ctx;
auto& record = itr.record;
auto& ctx = itr.ctx;
auto& record = itr.record;
auto& user_data = itr.user_data;
uint64_t extern_corr_id = 0;
auto& _correlation = ctx->correlation_tracer;
if(_correlation.external_id_callback)
{
_correlation.external_id = _correlation.external_id_callback(
info_type::callback_domain_idx, info_type::operation_idx, corr_id);
extern_corr_id = _correlation.external_id;
}
auto user_data = rocprofiler_user_data_t{.value = 0};
record = rocprofiler_callback_tracing_record_t{
thr_id,
rocprofiler_correlation_id_t{corr_id},
rocprofiler_external_correlation_id_t{extern_corr_id},
info_type::callback_domain_idx,
info_type::operation_idx,
ROCPROFILER_SERVICE_CALLBACK_PHASE_ENTER,
user_data,
static_cast<void*>(&tracer_data)};
auto corr_id = rocprofiler_correlation_id_t{
internal_corr_id, ctx->correlation_tracer.external_correlator.get(thr_id)};
record =
rocprofiler_callback_tracing_record_t{rocprofiler_context_id_t{ctx->context_idx},
thr_id,
corr_id,
info_type::callback_domain_idx,
info_type::operation_idx,
ROCPROFILER_SERVICE_CALLBACK_PHASE_ENTER,
static_cast<void*>(&tracer_data)};
auto& callback_info =
ctx->callback_tracer->callback_data.at(info_type::callback_domain_idx);
callback_info.callback(record, callback_info.data);
callback_info.callback(record, &user_data, callback_info.data);
// enter callback may update the external correlation id field
record.correlation_id.external =
ctx->correlation_tracer.external_correlator.get(thr_id);
}
}
// record the start timestamp as close to the function call as possible
if(!buffered_contexts.empty())
{
for(auto& itr : buffered_contexts)
{
itr.external_correlation = itr.ctx->correlation_tracer.external_correlator.get(thr_id);
}
buffer_record.start_timestamp = common::timestamp_ns();
}
@@ -294,15 +301,16 @@ hsa_api_impl<Idx>::functor(Args&&... args)
for(auto& itr : callback_contexts)
{
auto& ctx = itr.ctx;
auto& record = itr.record;
auto& ctx = itr.ctx;
auto& record = itr.record;
auto& user_data = itr.user_data;
record.phase = ROCPROFILER_SERVICE_CALLBACK_PHASE_EXIT;
record.payload = static_cast<void*>(&tracer_data);
auto& callback_info =
ctx->callback_tracer->callback_data.at(info_type::callback_domain_idx);
callback_info.callback(record, callback_info.data);
callback_info.callback(record, &user_data, callback_info.data);
}
}
@@ -318,6 +326,11 @@ hsa_api_impl<Idx>::functor(Args&&... args)
if(bitr && bitr->context_id == itr.ctx->context_idx &&
bitr->buffer_id == buffer_id.handle)
{
// make copy of record
auto record_v = buffer_record;
// update the record with the correlation
record_v.correlation_id.external = itr.external_correlation;
bitr->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING,
info_type::buffered_domain_idx,
buffer_record);
+92
Просмотреть файл
@@ -25,6 +25,77 @@
#include "lib/common/utility.hpp"
namespace rocprofiler
{
namespace
{
#define ROCPROFILER_STATUS_STRING(CODE, MSG) \
template <> \
struct status_string<CODE> \
{ \
static constexpr auto name = #CODE; \
static constexpr auto value = MSG; \
};
template <size_t Idx>
struct status_string;
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_SUCCESS, "Success")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR, "General error")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND, "Context ID not found")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND, "Buffer ID not found")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_DOMAIN_NOT_FOUND, "Domain ID not found")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_OPERATION_NOT_FOUND, "Operation ID not found")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_THREAD_NOT_FOUND, "Thread ID not found")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_CONTEXT_ERROR, "General context error")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND, "Agent ID not found")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND, "HW counter not found")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID,
"Context configuration is not valid")
ROCPROFILER_STATUS_STRING(
ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_STARTED,
"Context failed to be started (may be already started or atomic swap may have failed)")
ROCPROFILER_STATUS_STRING(
ROCPROFILER_STATUS_ERROR_BUFFER_BUSY,
"Buffer operation failed because it is currently busy handling another request")
ROCPROFILER_STATUS_STRING(
ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGURED,
"Service configuration request would overwrite existing service configuration values")
ROCPROFILER_STATUS_STRING(
ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED,
"Configuration request occurred outside of valid rocprofiler configuration period")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED,
"API function is defined but not implemented")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI,
"Data structure provided by user has a incompatible binary interface "
"with this version of rocprofiler")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT,
"Function invoked with one or more invalid arguments")
template <size_t Idx, size_t... Tail>
const char*
get_status_name(rocprofiler_status_t status, std::index_sequence<Idx, Tail...>)
{
if(status == Idx) return status_string<Idx>::name;
// recursion until tail empty
if constexpr(sizeof...(Tail) > 0)
return get_status_name(status, std::index_sequence<Tail...>{});
return nullptr;
}
template <size_t Idx, size_t... Tail>
const char*
get_status_string(rocprofiler_status_t status, std::index_sequence<Idx, Tail...>)
{
if(status == Idx) return status_string<Idx>::value;
// recursion until tail empty
if constexpr(sizeof...(Tail) > 0)
return get_status_string(status, std::index_sequence<Tail...>{});
return nullptr;
}
} // namespace
} // namespace rocprofiler
extern "C" {
rocprofiler_status_t
rocprofiler_get_version(uint32_t* major, uint32_t* minor, uint32_t* patch)
@@ -41,4 +112,25 @@ rocprofiler_get_timestamp(rocprofiler_timestamp_t* ts)
*ts = rocprofiler::common::timestamp_ns();
return ROCPROFILER_STATUS_SUCCESS;
}
rocprofiler_status_t
rocprofiler_get_thread_id(rocprofiler_thread_id_t* tid)
{
*tid = rocprofiler::common::get_tid();
return ROCPROFILER_STATUS_SUCCESS;
}
const char*
rocprofiler_get_status_name(rocprofiler_status_t status)
{
return rocprofiler::get_status_name(status,
std::make_index_sequence<ROCPROFILER_STATUS_LAST>{});
}
const char*
rocprofiler_get_status_string(rocprofiler_status_t status)
{
return rocprofiler::get_status_string(status,
std::make_index_sequence<ROCPROFILER_STATUS_LAST>{});
}
}
+1 -1
Просмотреть файл
@@ -34,7 +34,7 @@ set_tests_properties(${lib_TESTS} PROPERTIES TIMEOUT 45 LABELS "unittests")
#
# -------------------------------------------------------------------------------------- #
set(rocprofiler_shared_lib_sources registration.cpp)
set(rocprofiler_shared_lib_sources external_correlation.cpp registration.cpp status.cpp)
add_executable(rocprofiler-lib-tests-shared)
target_sources(rocprofiler-lib-tests-shared PRIVATE ${rocprofiler_shared_lib_sources})
+503
Просмотреть файл
@@ -0,0 +1,503 @@
// MIT License
//
// Copyright (c) 2023 ROCm Developer Tools
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <rocprofiler/registration.h>
#include <rocprofiler/rocprofiler.h>
#include "lib/common/environment.hpp"
#include "lib/common/units.hpp"
#include "lib/common/utility.hpp"
#include "rocprofiler/external_correlation.h"
#include "rocprofiler/fwd.h"
#include <gtest/gtest.h>
#include <dlfcn.h>
#include <pthread.h>
#include <cstdint>
#include <cstdlib>
#include <filesystem>
#include <iostream>
#include <map>
#include <mutex>
#include <random>
#include <sstream>
#include <string_view>
#include <typeinfo>
#include <unordered_map>
#include <vector>
#define ROCPROFILER_CALL(ARG, MSG) \
{ \
auto _status = (ARG); \
EXPECT_EQ(_status, ROCPROFILER_STATUS_SUCCESS) << MSG << " :: " << #ARG; \
}
namespace
{
struct callback_data
{
rocprofiler_client_id_t* client_id = nullptr;
rocprofiler_client_finalize_t client_fini_func = nullptr;
rocprofiler_context_id_t client_ctx = {};
rocprofiler_buffer_id_t client_buffer = {};
rocprofiler_callback_thread_t client_thread = {};
uint64_t client_workflow_count = {};
uint64_t client_callback_count = {};
int64_t current_depth = 0;
int64_t max_depth = 0;
std::map<uint64_t, rocprofiler_user_data_t> client_correlation = {};
};
struct agent_data
{
uint64_t agent_count = 0;
std::vector<hsa_device_type_t> agents = {};
};
void
tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
rocprofiler_user_data_t* user_data,
void* client_data)
{
static auto mtx = std::mutex{};
auto lk = std::unique_lock{mtx};
auto* cb_data = static_cast<callback_data*>(client_data);
auto now = rocprofiler::common::timestamp_ns();
auto internal_corr_id = record.correlation_id.internal;
auto& external_corr_id = record.correlation_id.external;
static auto first_now = now;
ASSERT_NE(cb_data, nullptr);
cb_data->client_callback_count++;
static auto first = std::once_flag{};
std::call_once(
first, [record]() { EXPECT_EQ(record.phase, ROCPROFILER_SERVICE_CALLBACK_PHASE_ENTER); });
if(record.phase == ROCPROFILER_SERVICE_CALLBACK_PHASE_ENTER)
{
EXPECT_EQ(cb_data->client_correlation.find(internal_corr_id),
cb_data->client_correlation.end())
<< "entry for internal correlation id " << internal_corr_id << " already exists";
cb_data->client_correlation[internal_corr_id] = external_corr_id;
user_data->value = now;
auto current_depth = cb_data->current_depth++;
if(current_depth == 0)
{
uint64_t tid = 0;
ROCPROFILER_CALL(rocprofiler_get_thread_id(&tid), "Failed to get thread id");
EXPECT_EQ(external_corr_id.value, tid);
}
ROCPROFILER_CALL(rocprofiler_push_external_correlation_id(
record.context_id,
record.thread_id,
rocprofiler_user_data_t{.value = (internal_corr_id + 1) * 1000}),
"Failed to push new external correlation");
}
else
{
EXPECT_NE(cb_data->client_correlation.find(internal_corr_id),
cb_data->client_correlation.end())
<< "entry for internal correlation id " << internal_corr_id << " does not exist";
EXPECT_EQ(external_corr_id.value, (internal_corr_id + 1) * 1000)
<< "external correlation id change was not retained";
auto external_corr_data = rocprofiler_user_data_t{};
ROCPROFILER_CALL(rocprofiler_pop_external_correlation_id(
record.context_id, record.thread_id, &external_corr_data),
"Failed to pop external correlation");
EXPECT_EQ(external_corr_data.value, (internal_corr_id + 1) * 1000)
<< "external correlation pop did not return current external correlation";
EXPECT_GT(user_data->value, 0) << "user data not set";
EXPECT_GE(user_data->value, first_now) << "timestamp not monotonically increasing";
EXPECT_LT(user_data->value, now) << "timestamp not monotonically increasing";
EXPECT_GT(cb_data->current_depth, 0) << "depth should be > 0";
cb_data->max_depth = std::max(cb_data->current_depth, cb_data->max_depth);
cb_data->current_depth--;
}
}
void
tool_tracing_buffered(rocprofiler_context_id_t context,
rocprofiler_buffer_id_t buffer_id,
rocprofiler_record_header_t** headers,
size_t num_headers,
void* buffer_data,
uint64_t drop_count)
{
std::cout << __FUNCTION__ << "...\n" << std::endl;
auto* cb_data = static_cast<callback_data*>(buffer_data);
auto v_records = std::vector<rocprofiler_buffer_tracing_hsa_api_record_t*>{};
v_records.reserve(num_headers);
for(size_t i = 0; i < num_headers; ++i)
{
auto* header = headers[i];
ASSERT_TRUE(header != nullptr);
auto hash = rocprofiler_record_header_compute_hash(header->category, header->kind);
EXPECT_EQ(header->hash, hash);
EXPECT_TRUE(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
header->kind == ROCPROFILER_SERVICE_BUFFER_TRACING_HSA_API);
v_records.emplace_back(
static_cast<rocprofiler_buffer_tracing_hsa_api_record_t*>(header->payload));
}
std::sort(v_records.begin(), v_records.end(), [](auto lhs, auto rhs) {
return (lhs->start_timestamp == rhs->start_timestamp)
? (lhs->end_timestamp < rhs->end_timestamp)
: (lhs->start_timestamp < rhs->start_timestamp);
});
for(auto* record : v_records)
{
auto info = std::stringstream{};
info << "tid=" << record->thread_id << ", context=" << context.handle
<< ", buffer_id=" << buffer_id.handle << ", cid=" << record->correlation_id.internal
<< ", kind=" << record->kind << ", operation=" << record->operation
<< ", drop_count=" << drop_count << ", start=" << record->start_timestamp
<< ", stop=" << record->end_timestamp;
static int64_t last_corr_id = -1;
auto corr_id = static_cast<int64_t>(record->correlation_id.internal);
std::cout << info.str() << "\n" << std::flush;
EXPECT_GE(context.handle, 0) << info.str();
EXPECT_GT(record->thread_id, 0) << info.str();
EXPECT_GT(record->kind, 0) << info.str();
EXPECT_GT(corr_id, last_corr_id) << info.str();
EXPECT_GT(record->start_timestamp, 0) << info.str();
EXPECT_GT(record->end_timestamp, 0) << info.str();
EXPECT_LE(record->start_timestamp, record->end_timestamp) << info.str();
cb_data->client_callback_count++;
last_corr_id = corr_id;
}
}
void
thread_precreate(rocprofiler_internal_thread_library_t /*lib*/, void* tool_data)
{
auto* cb_data = static_cast<callback_data*>(tool_data);
cb_data->client_workflow_count++;
}
void
thread_postcreate(rocprofiler_internal_thread_library_t /*lib*/, void* tool_data)
{
auto* cb_data = static_cast<callback_data*>(tool_data);
cb_data->client_workflow_count++;
}
} // namespace
TEST(rocprofiler_lib, callback_external_correlation)
{
using init_func_t = int (*)(rocprofiler_client_finalize_t, void*);
using fini_func_t = void (*)(void*);
using hsa_iterate_agents_cb_t = hsa_status_t (*)(hsa_agent_t, void*);
auto cmd_line = rocprofiler::common::read_command_line(getpid());
ASSERT_FALSE(cmd_line.empty());
static init_func_t tool_init = [](rocprofiler_client_finalize_t fini_func,
void* client_data) -> int {
auto* cb_data = static_cast<callback_data*>(client_data);
cb_data->client_workflow_count++;
cb_data->client_fini_func = fini_func;
ROCPROFILER_CALL(rocprofiler_create_context(&cb_data->client_ctx),
"failed to create context");
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
cb_data->client_ctx,
ROCPROFILER_SERVICE_CALLBACK_TRACING_HSA_API,
nullptr,
0,
tool_tracing_callback,
client_data),
"callback tracing service failed to configure");
int valid_ctx = 0;
ROCPROFILER_CALL(rocprofiler_context_is_valid(cb_data->client_ctx, &valid_ctx),
"failure checking context validity");
EXPECT_EQ(valid_ctx, 1);
ROCPROFILER_CALL(rocprofiler_start_context(cb_data->client_ctx),
"rocprofiler context start failed");
// no errors
return 0;
};
static fini_func_t tool_fini = [](void* client_data) -> void {
auto* cb_data = static_cast<callback_data*>(client_data);
ROCPROFILER_CALL(rocprofiler_stop_context(cb_data->client_ctx),
"rocprofiler context stop failed");
static_cast<callback_data*>(client_data)->client_workflow_count++;
};
static auto cb_data = callback_data{};
static auto cfg_result =
rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t),
tool_init,
tool_fini,
static_cast<void*>(&cb_data)};
static rocprofiler_configure_func_t rocp_init =
[](uint32_t version,
const char* runtime_version,
uint32_t prio,
rocprofiler_client_id_t* client_id) -> rocprofiler_tool_configure_result_t* {
auto expected_version = ROCPROFILER_VERSION;
EXPECT_EQ(expected_version, version);
EXPECT_EQ(std::string_view{runtime_version}, std::string_view{ROCPROFILER_VERSION_STRING});
EXPECT_EQ(prio, 0);
EXPECT_EQ(client_id->name, nullptr);
cb_data.client_id = client_id;
cb_data.client_id->name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
return &cfg_result;
};
EXPECT_EQ(rocprofiler_force_configure(rocp_init), ROCPROFILER_STATUS_SUCCESS);
uint64_t tid = 0;
ROCPROFILER_CALL(rocprofiler_get_thread_id(&tid), "failed to get thread id");
ROCPROFILER_CALL(rocprofiler_push_external_correlation_id(
cb_data.client_ctx, tid, rocprofiler_user_data_t{.value = tid}),
"failed to push correlation id");
hsa_iterate_agents_cb_t agent_cb = [](hsa_agent_t agent, void* data) {
static_cast<agent_data*>(data)->agent_count++;
auto status = HSA_STATUS_SUCCESS;
auto agent_type = hsa_device_type_t{};
if((status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &agent_type)) ==
HSA_STATUS_SUCCESS)
static_cast<agent_data*>(data)->agents.emplace_back(agent_type);
return status;
};
auto _agent_data = agent_data{};
uint64_t num_runs = 0;
hsa_init();
auto run = [&agent_cb, &_agent_data, &num_runs]() {
++num_runs;
uint64_t _tid = 0;
ROCPROFILER_CALL(rocprofiler_get_thread_id(&_tid), "failed to get thread id");
ROCPROFILER_CALL(rocprofiler_push_external_correlation_id(
cb_data.client_ctx, _tid, rocprofiler_user_data_t{.value = _tid}),
"failed to push correlation id");
hsa_status_t itr_status = hsa_iterate_agents(agent_cb, static_cast<void*>(&_agent_data));
EXPECT_EQ(itr_status, HSA_STATUS_SUCCESS);
auto user_data = rocprofiler_user_data_t{};
ROCPROFILER_CALL(
rocprofiler_pop_external_correlation_id(cb_data.client_ctx, _tid, &user_data),
"failed to push correlation id");
EXPECT_EQ(user_data.value, _tid)
<< "callback modification to external correlation id should not be seen here";
};
run();
std::thread{run}.join();
std::thread{run}.join();
EXPECT_GT(_agent_data.agent_count, 0);
EXPECT_EQ(_agent_data.agent_count, _agent_data.agents.size());
ASSERT_NE(cb_data.client_id, nullptr);
ASSERT_NE(cb_data.client_fini_func, nullptr);
cb_data.client_fini_func(*cb_data.client_id);
// expected callback count is two for each hsa_iterate_agents and two callbacks for
// hsa_agent_get_info for each agent.
uint64_t expected_cb_count = (2 * num_runs) + (2 * _agent_data.agent_count);
EXPECT_EQ(cb_data.client_workflow_count, 2);
EXPECT_EQ(cb_data.client_callback_count, expected_cb_count);
EXPECT_EQ(cb_data.client_correlation.size(), expected_cb_count / 2);
EXPECT_EQ(cb_data.current_depth, 0);
EXPECT_EQ(cb_data.max_depth, 2);
}
TEST(rocprofiler_lib, buffered_external_correlation)
{
using init_func_t = int (*)(rocprofiler_client_finalize_t, void*);
using fini_func_t = void (*)(void*);
using hsa_iterate_agents_cb_t = hsa_status_t (*)(hsa_agent_t, void*);
auto cmd_line = rocprofiler::common::read_command_line(getpid());
ASSERT_FALSE(cmd_line.empty());
static init_func_t tool_init = [](rocprofiler_client_finalize_t fini_func,
void* client_data) -> int {
auto* cb_data = static_cast<callback_data*>(client_data);
cb_data->client_workflow_count++;
cb_data->client_fini_func = fini_func;
ROCPROFILER_CALL(rocprofiler_create_context(&cb_data->client_ctx),
"failed to create context");
ROCPROFILER_CALL(rocprofiler_create_buffer(cb_data->client_ctx,
4096,
2048,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
tool_tracing_buffered,
client_data,
&cb_data->client_buffer),
"buffer creation failed");
ROCPROFILER_CALL(
rocprofiler_configure_buffer_tracing_service(cb_data->client_ctx,
ROCPROFILER_SERVICE_BUFFER_TRACING_HSA_API,
nullptr,
0,
cb_data->client_buffer),
"buffer tracing service failed to configure");
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&cb_data->client_thread),
"failure creating callback thread");
ROCPROFILER_CALL(
rocprofiler_assign_callback_thread(cb_data->client_buffer, cb_data->client_thread),
"failed to assign thread for buffer");
int valid_ctx = 0;
ROCPROFILER_CALL(rocprofiler_context_is_valid(cb_data->client_ctx, &valid_ctx),
"failure checking context validity");
EXPECT_EQ(valid_ctx, 1);
ROCPROFILER_CALL(rocprofiler_start_context(cb_data->client_ctx),
"rocprofiler context start failed");
// no errors
return 0;
};
static fini_func_t tool_fini = [](void* client_data) -> void {
auto* cb_data = static_cast<callback_data*>(client_data);
ROCPROFILER_CALL(rocprofiler_stop_context(cb_data->client_ctx),
"rocprofiler context stop failed");
static_cast<callback_data*>(client_data)->client_workflow_count++;
};
static auto cb_data = callback_data{};
static auto cfg_result =
rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t),
tool_init,
tool_fini,
static_cast<void*>(&cb_data)};
static rocprofiler_configure_func_t rocp_init =
[](uint32_t version,
const char* runtime_version,
uint32_t prio,
rocprofiler_client_id_t* client_id) -> rocprofiler_tool_configure_result_t* {
auto expected_version = ROCPROFILER_VERSION;
EXPECT_EQ(expected_version, version);
EXPECT_EQ(std::string_view{runtime_version}, std::string_view{ROCPROFILER_VERSION_STRING});
EXPECT_EQ(prio, 0);
EXPECT_EQ(client_id->name, nullptr);
cb_data.client_id = client_id;
cb_data.client_id->name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
ROCPROFILER_CALL(rocprofiler_at_internal_thread_create(thread_precreate,
thread_postcreate,
ROCPROFILER_LIBRARY,
static_cast<void*>(&cb_data)),
"failed to register for thread creation notifications");
return &cfg_result;
};
auto ctx = rocprofiler_context_id_t{};
EXPECT_NE(rocprofiler_create_context(&ctx), ROCPROFILER_STATUS_SUCCESS);
EXPECT_EQ(rocprofiler_force_configure(rocp_init), ROCPROFILER_STATUS_SUCCESS);
EXPECT_NE(rocprofiler_create_context(&ctx), ROCPROFILER_STATUS_SUCCESS);
hsa_iterate_agents_cb_t agent_cb = [](hsa_agent_t agent, void* data) {
static_cast<agent_data*>(data)->agent_count++;
auto status = HSA_STATUS_SUCCESS;
auto agent_type = hsa_device_type_t{};
if((status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &agent_type)) ==
HSA_STATUS_SUCCESS)
static_cast<agent_data*>(data)->agents.emplace_back(agent_type);
return status;
};
auto _agent_data = agent_data{};
hsa_init();
hsa_status_t itr_status = hsa_iterate_agents(agent_cb, static_cast<void*>(&_agent_data));
EXPECT_EQ(itr_status, HSA_STATUS_SUCCESS);
EXPECT_GT(_agent_data.agent_count, 0);
EXPECT_EQ(_agent_data.agent_count, _agent_data.agents.size());
ASSERT_NE(cb_data.client_id, nullptr);
ASSERT_NE(cb_data.client_fini_func, nullptr);
EXPECT_EQ(rocprofiler_flush_buffer(cb_data.client_buffer), ROCPROFILER_STATUS_SUCCESS);
cb_data.client_fini_func(*cb_data.client_id);
// expected callback count is two for hsa_iterate_agents and two callbacks for
// hsa_agent_get_info for each agent.
uint64_t expected_cb_count = 1 + _agent_data.agent_count;
// expect the tool init, tool fini, and two calls to thread_precreate and thread_postcreate each
// (the main thread and the assigned thread for the buffer)
uint64_t expected_workflow_count = 6;
EXPECT_EQ(cb_data.client_workflow_count, expected_workflow_count);
EXPECT_EQ(cb_data.client_callback_count, expected_cb_count);
EXPECT_GT(cb_data.client_thread.handle, 0);
EXPECT_EQ(cb_data.current_depth, 0);
EXPECT_EQ(cb_data.max_depth, 0);
}
+5 -3
Просмотреть файл
@@ -69,7 +69,9 @@ struct agent_data
};
void
tool_tracing_callback(rocprofiler_callback_tracing_record_t record, void* client_data)
tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
rocprofiler_user_data_t*,
void* client_data)
{
using name_map_t = std::unordered_map<rocprofiler_service_callback_tracing_kind_t,
std::unordered_map<uint32_t, const char*>>;
@@ -227,13 +229,13 @@ tool_tracing_buffered(rocprofiler_context_id_t context,
{
auto info = std::stringstream{};
info << "tid=" << record->thread_id << ", context=" << context.handle
<< ", buffer_id=" << buffer_id.handle << ", cid=" << record->correlation_id.id
<< ", buffer_id=" << buffer_id.handle << ", cid=" << record->correlation_id.internal
<< ", kind=" << record->kind << ", operation=" << record->operation
<< ", drop_count=" << drop_count << ", start=" << record->start_timestamp
<< ", stop=" << record->end_timestamp;
static int64_t last_corr_id = -1;
auto corr_id = static_cast<int64_t>(record->correlation_id.id);
auto corr_id = static_cast<int64_t>(record->correlation_id.internal);
std::cout << info.str() << "\n" << std::flush;
EXPECT_GE(context.handle, 0) << info.str();
+53
Просмотреть файл
@@ -0,0 +1,53 @@
// MIT License
//
// Copyright (c) 2023 ROCm Developer Tools
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <rocprofiler/fwd.h>
#include <rocprofiler/rocprofiler.h>
#include <gtest/gtest.h>
#include <string_view>
TEST(rocprofiler_lib, error_string)
{
for(size_t i = 0; i < static_cast<size_t>(ROCPROFILER_STATUS_LAST); ++i)
{
auto status = static_cast<rocprofiler_status_t>(i);
const auto* name = rocprofiler_get_status_name(status);
const auto* message = rocprofiler_get_status_string(status);
ASSERT_NE(name, nullptr) << "idx=" << i;
ASSERT_NE(message, nullptr) << name << " (idx=" << i << ")";
std::cout << std::setw(60) << name << " :: " << message << "\n";
if(i == ROCPROFILER_STATUS_SUCCESS)
{
EXPECT_EQ(std::string_view{message}, std::string_view{"Success"});
}
else
{
EXPECT_GE(std::string_view{message}.length(), 8)
<< "status message for " << name << " (idx=" << i << ") is too short";
}
}
}
+4 -4
Просмотреть файл
@@ -36,15 +36,15 @@ TEST(common, demangling)
using strview_pair_t = std::pair<std::string_view, std::string_view>;
for(auto [mangled, demangled] :
{strview_pair_t{"_ZN11rocprofiler8internal18correlation_config20get_unique_record_idEv",
"rocprofiler::internal::correlation_config::get_unique_record_id()"},
{strview_pair_t{"_ZN11rocprofiler8internal18correlation_config22get_unique_internal_idEv",
"rocprofiler::internal::correlation_config::get_unique_internal_id()"},
strview_pair_t{"_ZN11rocprofiler8internal18get_active_configsEv",
"rocprofiler::internal::get_active_configs()"},
strview_pair_t{"_ZN11rocprofiler8internal22get_registered_configsEv",
"rocprofiler::internal::get_registered_configs()"},
strview_pair_t{
"_ZZN11rocprofiler8internal18correlation_config20get_unique_record_idEvE2_v",
"rocprofiler::internal::correlation_config::get_unique_record_id()::_v"},
"_ZZN11rocprofiler8internal18correlation_config22get_unique_internal_idEvE2_v",
"rocprofiler::internal::correlation_config::get_unique_internal_id()::_v"},
strview_pair_t{"_ZZN11rocprofiler8internal18get_active_configsEvE2_v",
"rocprofiler::internal::get_active_configs()::_v"},
strview_pair_t{"_ZZN11rocprofiler8internal22get_registered_configsEvE2_v",