diff --git a/CMakeLists.txt b/CMakeLists.txt index 049d09723a..afd86e4281 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,7 @@ string(REGEX REPLACE "([0-9]+)\.([0-9]+)\.([0-9]+)(.*)" "\\1.\\2.\\3" ROCPROFILE foreach(_LANG C CXX) set(CMAKE_${_LANG}_FLAGS_COVERAGE_INIT - "-Og -g3 -fno-omit-frame-pointer -fprofile-abs-path -fprofile-arcs -ftest-coverage --coverage" + "-Og -g3 -fno-omit-frame-pointer -fprofile-abs-path -fprofile-arcs -ftest-coverage --coverage -DCODECOV=1" CACHE STRING "${_LANG} flags for code coverage builds") set(CMAKE_${_LANG}_FLAGS_COVERAGE "${CMAKE_${_LANG}_FLAGS_COVERAGE_INIT}" diff --git a/samples/api_buffered_tracing/client.cpp b/samples/api_buffered_tracing/client.cpp index 3e5420f452..0bef606f76 100644 --- a/samples/api_buffered_tracing/client.cpp +++ b/samples/api_buffered_tracing/client.cpp @@ -211,10 +211,10 @@ tool_tracing_callback(rocprofiler_context_id_t context, static_cast(header->payload); auto info = std::stringstream{}; info << "tid=" << record->thread_id << ", context=" << context.handle - << ", buffer_id=" << buffer_id.handle << ", cid=" << record->correlation_id.id - << ", kind=" << record->kind << ", operation=" << record->operation - << ", drop_count=" << drop_count << ", start=" << record->start_timestamp - << ", stop=" << record->end_timestamp; + << ", buffer_id=" << buffer_id.handle + << ", cid=" << record->correlation_id.internal << ", kind=" << record->kind + << ", operation=" << record->operation << ", drop_count=" << drop_count + << ", start=" << record->start_timestamp << ", stop=" << record->end_timestamp; if(record->start_timestamp > record->end_timestamp) throw std::runtime_error("start > end"); diff --git a/samples/api_callback_tracing/client.cpp b/samples/api_callback_tracing/client.cpp index 957c9ecd21..33fa74820f 100644 --- a/samples/api_callback_tracing/client.cpp +++ b/samples/api_callback_tracing/client.cpp @@ -167,12 +167,14 @@ store_callback_id_names(call_stack_t* tool_data) } void -tool_tracing_callback(rocprofiler_callback_tracing_record_t record, void* user_data) +tool_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t*, + void* user_data) { assert(user_data != nullptr); auto info = std::stringstream{}; - info << "tid=" << record.thread_id << ", cid=" << record.correlation_id.id + info << "tid=" << record.thread_id << ", cid=" << record.correlation_id.internal << ", kind=" << record.kind << ", operation=" << record.operation << ", phase=" << record.phase; diff --git a/samples/counter_collection/client.cpp b/samples/counter_collection/client.cpp index 07cc8da493..903956be44 100644 --- a/samples/counter_collection/client.cpp +++ b/samples/counter_collection/client.cpp @@ -44,7 +44,7 @@ test_callback(rocprofiler_queue_id_t queue_id, { // Callback containing counter data. std::clog << "[" << __FUNCTION__ << "] " << queue_id.handle << " | " << agent_id.id.handle - << " | " << corr_id.id << "\n"; + << " | " << corr_id.internal << "\n"; } int diff --git a/source/include/rocprofiler/buffer_tracing.h b/source/include/rocprofiler/buffer_tracing.h index 05df9c4af1..0e3fbb000d 100644 --- a/source/include/rocprofiler/buffer_tracing.h +++ b/source/include/rocprofiler/buffer_tracing.h @@ -200,8 +200,7 @@ typedef struct { rocprofiler_service_buffer_tracing_kind_t kind; rocprofiler_correlation_id_t correlation_id; - rocprofiler_external_correlation_id_t external_correlation_id; -} rocprofiler_buffer_tracing_external_correlation_record_t; +} rocprofiler_buffer_tracing_correlation_record_t; /** * @brief Callback function for mapping @ref rocprofiler_service_buffer_tracing_kind_t ids to diff --git a/source/include/rocprofiler/callback_tracing.h b/source/include/rocprofiler/callback_tracing.h index 56828397a6..98bc570d3c 100644 --- a/source/include/rocprofiler/callback_tracing.h +++ b/source/include/rocprofiler/callback_tracing.h @@ -132,10 +132,26 @@ typedef struct } rocprofiler_callback_tracer_code_object_register_host_kernel_symbol_data_t; /** - * @brief API Tracing callback function. + * @brief API Tracing callback function. This function is invoked twice per API function: once + * before the function is invoked and once after the function is invoked. The external correlation + * id value within the record is assigned the value at the top of the external correlation id stack. + * It is permissible to invoke @ref rocprofiler_push_external_correlation_id within the enter phase; + * when a new external correlation id is pushed during the enter phase, rocprofiler will use that + * external correlation id for any async events and provide the new external correlation id during + * the exit callback... In other words, pushing a new external correlation id within the enter + * callback will result in that external correlation id value in the exit callback (which may or may + * not be different from the external correlation id value in the enter callback). If a tool pushes + * new external correlation ids in the enter phase, it is recommended to pop the external + * correlation id in the exit callback. + * + * @param record [in] Callback record data + * @param user_data [in,out] This paramter can be used to retain information in between the enter + * and exit phases. + * @param callback_data [in] User data provided when configuring the callback tracing service */ typedef void (*rocprofiler_callback_tracing_cb_t)(rocprofiler_callback_tracing_record_t record, - void* user_data); + rocprofiler_user_data_t* user_data, + void* callback_data) ROCPROFILER_NONNULL(2); /** * @brief Callback function for mapping @ref rocprofiler_service_callback_tracing_kind_t ids to @@ -180,15 +196,31 @@ typedef int (*rocprofiler_callback_tracing_operation_args_cb_t)( void* data); /** - * @brief Configure Callback Tracing Service. + * @brief Configure Callback Tracing Service. The callback tracing service provides two synchronous + * callbacks around an API function on the same thread as the application which is invoking the API + * function. This function can only be invoked once per @ref + * rocprofiler_service_callback_tracing_kind_t value, i.e. it can be invoked once for the HSA API, + * once for the HIP API, and so on but it will fail if it is invoked for the HSA API twice. Please + * note, the callback API does have the potentially non-trivial overhead of copying the function + * arguments into the record. If you are willing to let rocprofiler record the timestamps, do not + * require synchronous notifications of the API calls, and want to lowest possible overhead, use the + * @see BUFFER_TRACING_SERVICE. * - * @param [in] context_id - * @param [in] kind - * @param [in] operations - * @param [in] operations_count - * @param [in] callback - * @param [in] callback_args - * @return ::rocprofiler_status_t + * @param [in] context_id Context to associate the service with + * @param [in] kind The domain of the callback tracing service + * @param [in] operations Array of operations in the domain (i.e. enum values which identify + * specific API functions). If this is null, all API functions in the domain will be traced + * @param [in] operations_count If the operations array is non-null, set this to the size of the + * array. + * @param [in] callback The function to invoke before and after an API function + * @param [in] callback_args Data provided to every invocation of the callback function + * @return ::rocprofiler_status_t Will return @ref ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED if + * invoked outside of the initialization function in @ref rocprofiler_tool_configure_result_t + * provided to rocprofiler via @ref rocprofiler_configure function. Will return @ref + * ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND if the provided context is not valid/registered. Will + * return @ref ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGURED if the same @ref + * rocprofiler_service_callback_tracing_kind_t value is provided more than once (per context) -- in + * other words, we do not support overriding or combining the operations in separate function calls. * */ rocprofiler_status_t ROCPROFILER_API diff --git a/source/include/rocprofiler/external_correlation.h b/source/include/rocprofiler/external_correlation.h index bb41061d7d..9c3034d5b4 100644 --- a/source/include/rocprofiler/external_correlation.h +++ b/source/include/rocprofiler/external_correlation.h @@ -37,23 +37,32 @@ ROCPROFILER_EXTERN_C_INIT /** @} */ /** - * @brief ROCProfiler Push External Correlation ID. + * @brief Push default value for `external` field in @ref rocprofiler_correlation_id_t onto stack. * - * @param external_correlation_id - * @return rocprofiler_status_t + * @param context [in] Associated context + * @param tid [in] thread identifier. @see rocprofiler_get_thread_id + * @param external_correlation_id [in] User data to place in external field in @ref + * rocprofiler_correlation_id_t + * @return rocprofiler_status_t Returns ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND if the context + * does not exist. Returns ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT if thread id is not valid. */ rocprofiler_status_t ROCPROFILER_API -rocprofiler_push_external_correlation_id( - rocprofiler_external_correlation_id_t external_correlation_id); +rocprofiler_push_external_correlation_id(rocprofiler_context_id_t context, + rocprofiler_thread_id_t tid, + rocprofiler_user_data_t external_correlation_id); /** - * @brief ROCProfiler Push External Correlation ID. + * @brief Pop default value for `external` field in @ref rocprofiler_correlation_id_t off of stack * - * @param external_correlation_id - * @return rocprofiler_status_t + * @param context [in] Associated context + * @param tid [in] thread identifier. @see rocprofiler_get_thread_id + * @param external_correlation_id [out] Correlation id data popped off the stack + * @return rocprofiler_status_t Returns ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND if the context + * does not exist. Returns ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT if thread id is not valid. */ rocprofiler_status_t ROCPROFILER_API -rocprofiler_pop_external_correlation_id( - rocprofiler_external_correlation_id_t* external_correlation_id); +rocprofiler_pop_external_correlation_id(rocprofiler_context_id_t context, + rocprofiler_thread_id_t tid, + rocprofiler_user_data_t* external_correlation_id); ROCPROFILER_EXTERN_C_FINI diff --git a/source/include/rocprofiler/fwd.h b/source/include/rocprofiler/fwd.h index 94e0e40fdc..f7ee42866c 100644 --- a/source/include/rocprofiler/fwd.h +++ b/source/include/rocprofiler/fwd.h @@ -54,6 +54,8 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_STATUS_ERROR_DOMAIN_NOT_FOUND, ///< Domain identifier is invalid ROCPROFILER_STATUS_ERROR_OPERATION_NOT_FOUND, ///< Operation identifier is invalid for domain ROCPROFILER_STATUS_ERROR_THREAD_NOT_FOUND, ///< No valid thread for given thread id + ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND, ///< Agent identifier not found + ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND, ///< Counter identifier does not exist ROCPROFILER_STATUS_ERROR_CONTEXT_ERROR, ///> Generalized context error ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID, ///< Context configuration is not valid ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_STARTED, ///< Context was not started (maybe already @@ -69,8 +71,8 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED, ///< Function is not implemented ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI, ///< Data structure provided by user is incompatible ///< with current version of rocprofiler - ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND, ///< Agent not found - ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND, ///< Counter does not exist + ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT, ///< Function invoked with one or more invalid + ///< arguments ROCPROFILER_STATUS_LAST, } rocprofiler_status_t; @@ -282,17 +284,10 @@ typedef struct */ typedef struct { - uint64_t id; + uint64_t internal; + rocprofiler_user_data_t external; } rocprofiler_correlation_id_t; -/** - * @brief ROCProfiler External Correlation ID. - */ -typedef struct -{ - uint64_t id; -} rocprofiler_external_correlation_id_t; - /** * @struct rocprofiler_buffer_id_t * @brief Buffer ID. @@ -332,13 +327,12 @@ typedef struct */ typedef struct rocprofiler_callback_tracing_record_t { + rocprofiler_context_id_t context_id; rocprofiler_thread_id_t thread_id; rocprofiler_correlation_id_t correlation_id; - rocprofiler_external_correlation_id_t external_correlation_id; rocprofiler_service_callback_tracing_kind_t kind; uint32_t operation; rocprofiler_service_callback_phase_t phase; - rocprofiler_user_data_t data; void* payload; } rocprofiler_callback_tracing_record_t; diff --git a/source/include/rocprofiler/rocprofiler.h b/source/include/rocprofiler/rocprofiler.h index 261572c701..7fec25a63d 100644 --- a/source/include/rocprofiler/rocprofiler.h +++ b/source/include/rocprofiler/rocprofiler.h @@ -97,6 +97,32 @@ ROCPROFILER_EXTERN_C_INIT rocprofiler_status_t rocprofiler_get_timestamp(rocprofiler_timestamp_t* ts) ROCPROFILER_API ROCPROFILER_NONNULL(1); +/** + * @fn rocprofiler_status_t rocprofiler_get_thread_id(rocprofiler_thread_id_t* tid) + * @brief Get the identifier value of the current thread that is used by rocprofiler + * @param [out] tid Output address of the rocprofiler thread id value + */ +rocprofiler_status_t +rocprofiler_get_thread_id(rocprofiler_thread_id_t* tid) ROCPROFILER_API ROCPROFILER_NONNULL(1); + +/** + * @fn const char* rocprofiler_get_status_name(rocprofiler_status_t status) + * @brief Return the string encoding of @ref rocprofiler_status_t value + * @param [in] status error code value + * @return Will return a nullptr if invalid/unsupported @ref rocprofiler_status_t value is provided. + */ +const char* +rocprofiler_get_status_name(rocprofiler_status_t status) ROCPROFILER_API; + +/** + * @fn const char* rocprofiler_get_status_string(rocprofiler_status_t status) + * @brief Return the message associated with @ref rocprofiler_status_t value + * @param [in] status error code value + * @return Will return a nullptr if invalid/unsupported @ref rocprofiler_status_t value is provided. + */ +const char* +rocprofiler_get_status_string(rocprofiler_status_t status) ROCPROFILER_API; + /** @} */ ROCPROFILER_EXTERN_C_FINI diff --git a/source/lib/common/synchronized.hpp b/source/lib/common/synchronized.hpp index e688ad89cc..cd9c49250d 100644 --- a/source/lib/common/synchronized.hpp +++ b/source/lib/common/synchronized.hpp @@ -26,6 +26,7 @@ #include #include #include +#include namespace rocprofiler { @@ -52,45 +53,89 @@ namespace common * // set data to new value * }); */ -template +template class Synchronized { public: - Synchronized() = default; - Synchronized(LockedType&& data) - : data_(std::move(data)) + using value_type = LockedType; + using this_type = Synchronized; + + Synchronized() = default; + ~Synchronized() = default; + + explicit Synchronized(value_type&& data) + : m_data{std::move(data)} {} + + Synchronized(Synchronized&& data) noexcept = default; + Synchronized& operator=(Synchronized&& data) noexcept = default; + // Do not allow this data structure to be copied, std::move only. Synchronized(const Synchronized&) = delete; + Synchronized& operator=(const Synchronized&) = delete; - void rlock(std::function lambda) const + template + auto rlock(FuncT&& lambda, Args&&... args) const { - std::shared_lock lock(mutex_); - lambda(data_); + static_assert(std::is_invocable::value, + "function must accept const reference to locked type"); + + auto lock = std::shared_lock{m_mutex}; + return std::forward(lambda)(m_data, std::forward(args)...); } - void wlock(std::function lambda) + template + auto wlock(FuncT&& lambda, Args&&... args) { - std::unique_lock lock(mutex_); - lambda(data_); + static_assert(std::is_invocable::value, + "function must accept reference to locked type"); + + auto lock = std::unique_lock{m_mutex}; + return std::forward(lambda)(m_data, std::forward(args)...); + } + + // This overload to wlock allows a synchronized map whose keys map to synchronized data to + // use a read lock on the key data and then a write lock on the mapped data. + template = 0> + auto wlock(FuncT&& lambda, Args&&... args) const + { + return const_cast(this)->wlock(std::forward(lambda), + std::forward(args)...); } // Upgradable lock. If read returns false, write will be called with a unique_lock. // Essentially a helper function that does .rlock() followed by .wlock(). - void ulock(std::function read, std::function write) + template + bool ulock(ReadFuncT&& read, WriteFuncT&& write, Args&&... args) { + static_assert(std::is_invocable::value, + "read function must accept const reference to locked type"); + static_assert(std::is_invocable::value, + "write function must accept reference to locked type"); + + using read_return_type = std::invoke_result_t; + using write_return_type = std::invoke_result_t; + + static_assert(std::is_same::value, + "read and write functions must return same type"); + static_assert(std::is_same::value, + "read/write functions must return bool"); + { - std::shared_lock lock(mutex_); - if(read(data_)) return; + auto lock = std::shared_lock{m_mutex}; + if(read(m_data, std::forward(args)...)) return true; } - std::unique_lock lock(mutex_); - write(data_); + auto lock = std::unique_lock{m_mutex}; + return write(m_data, std::forward(args)...); } private: - mutable std::shared_mutex mutex_; - LockedType data_; + mutable std::shared_mutex m_mutex = {}; + value_type m_data = {}; }; } // namespace common } // namespace rocprofiler diff --git a/source/lib/rocprofiler/CMakeLists.txt b/source/lib/rocprofiler/CMakeLists.txt index 2dba956d81..bad117589e 100644 --- a/source/lib/rocprofiler/CMakeLists.txt +++ b/source/lib/rocprofiler/CMakeLists.txt @@ -3,7 +3,8 @@ # rocprofiler_activate_clang_tidy() -set(ROCPROFILER_LIB_HEADERS buffer.hpp internal_threading.hpp registration.hpp) +set(ROCPROFILER_LIB_HEADERS buffer.hpp external_correlation.hpp internal_threading.hpp + registration.hpp) set(ROCPROFILER_LIB_SOURCES agent.cpp buffer.cpp @@ -12,6 +13,7 @@ set(ROCPROFILER_LIB_SOURCES context.cpp counters.cpp dispatch_profile.cpp + external_correlation.cpp internal_threading.cpp pc_sampling.cpp profile_config.cpp diff --git a/source/lib/rocprofiler/context.cpp b/source/lib/rocprofiler/context.cpp index 6aafea1cfa..2c93fd0668 100644 --- a/source/lib/rocprofiler/context.cpp +++ b/source/lib/rocprofiler/context.cpp @@ -65,7 +65,7 @@ rocprofiler_context_is_active(rocprofiler_context_id_t context_id, int* status) *status = 0; for(const auto& itr : rocprofiler::context::get_active_contexts()) { - auto* cfg = itr.load(std::memory_order_relaxed); + const auto* cfg = itr.load(std::memory_order_relaxed); if(cfg && cfg->context_idx == context_id.handle) { *status = 1; diff --git a/source/lib/rocprofiler/context/context.cpp b/source/lib/rocprofiler/context/context.cpp index 7fd4b0a8d0..21dd3b14de 100644 --- a/source/lib/rocprofiler/context/context.cpp +++ b/source/lib/rocprofiler/context/context.cpp @@ -61,7 +61,7 @@ get_client_index() } // namespace uint64_t -correlation_tracing_service::get_unique_record_id() +correlation_tracing_service::get_unique_internal_id() { static auto _v = std::atomic{}; return _v++; @@ -181,7 +181,7 @@ start_context(rocprofiler_context_id_t context_id) // try to find a nullptr slot first for(size_t i = 0; i < get_active_contexts().size(); ++i) { - auto* itr = get_active_contexts().at(i).load(std::memory_order_relaxed); + const auto* itr = get_active_contexts().at(i).load(std::memory_order_relaxed); if(itr == nullptr) { idx = i; @@ -201,8 +201,8 @@ start_context(rocprofiler_context_id_t context_id) } // atomic swap the pointer into the "active" array used internally - context* _expected = nullptr; - bool success = get_active_contexts().at(idx).compare_exchange_strong( + const context* _expected = nullptr; + bool success = get_active_contexts().at(idx).compare_exchange_strong( _expected, get_registered_contexts().at(context_id.handle).get()); if(!success) return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_STARTED; @@ -219,7 +219,7 @@ stop_context(rocprofiler_context_id_t idx) // callbacks for(auto& itr : get_active_contexts()) { - auto* _expected = itr.load(std::memory_order_relaxed); + const auto* _expected = itr.load(std::memory_order_relaxed); if(_expected && _expected->context_idx == idx.handle) { bool success = itr.compare_exchange_strong(_expected, nullptr); @@ -237,7 +237,7 @@ deactivate_client_contexts(rocprofiler_client_id_t client_id) { for(auto& itr : get_active_contexts()) { - auto* itr_v = itr.load(); + const auto* itr_v = itr.load(); if(itr_v->client_idx == client_id.handle) { itr.store(nullptr); diff --git a/source/lib/rocprofiler/context/context.hpp b/source/lib/rocprofiler/context/context.hpp index feb8c9503d..b677ad8b8d 100644 --- a/source/lib/rocprofiler/context/context.hpp +++ b/source/lib/rocprofiler/context/context.hpp @@ -27,8 +27,10 @@ #include #include "lib/common/container/stable_vector.hpp" +#include "lib/common/synchronized.hpp" #include "lib/rocprofiler/context/domain.hpp" #include "lib/rocprofiler/counters/core.hpp" +#include "lib/rocprofiler/external_correlation.hpp" #include #include @@ -48,11 +50,8 @@ using external_cid_cb_t = uint64_t (*)(rocprofiler_service_callback_tracing_kind /// the rocprofiler generated correlation id struct correlation_tracing_service { - uint64_t id = 0; - uint64_t external_id = 0; - external_cid_cb_t external_id_callback = nullptr; - - static uint64_t get_unique_record_id(); + external_correlation::external_correlation external_correlator = {}; + static uint64_t get_unique_internal_id(); }; struct callback_tracing_service @@ -85,11 +84,11 @@ struct counter_collection_service // Each instance is assocated with an agent and a counter collection profile. // Contains callback information along with other data needed to collect/process // counters. - std::vector> callbacks{}; + std::vector> callbacks{}; // A flag to state wether or not the counter set is currently enabled. This is primarily // to protect against multithreaded calls to enable a context (and enabling already enabled // counters). - rocprofiler::common::Synchronized enabled{false}; + common::Synchronized enabled{false}; }; struct context @@ -135,7 +134,7 @@ start_context(rocprofiler_context_id_t id); rocprofiler_status_t stop_context(rocprofiler_context_id_t); using unique_context_vec_t = common::container::stable_vector, 8>; -using active_context_vec_t = common::container::stable_vector, 8>; +using active_context_vec_t = common::container::stable_vector, 8>; unique_context_vec_t& get_registered_contexts(); diff --git a/source/lib/rocprofiler/external_correlation.cpp b/source/lib/rocprofiler/external_correlation.cpp new file mode 100644 index 0000000000..41c7089fd0 --- /dev/null +++ b/source/lib/rocprofiler/external_correlation.cpp @@ -0,0 +1,150 @@ +// MIT License +// +// Copyright (c) 2023 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include + +#include "lib/common/synchronized.hpp" +#include "lib/rocprofiler/context/context.hpp" +#include "lib/rocprofiler/external_correlation.hpp" + +#include + +namespace rocprofiler +{ +namespace external_correlation +{ +rocprofiler_user_data_t +external_correlation::get(rocprofiler_thread_id_t tid) const +{ + static constexpr auto empty_user_data = rocprofiler_user_data_t{.value = 0}; + + return data.rlock( + [](const external_correlation_map_t& _data, rocprofiler_thread_id_t tid_v) { + if(_data.count(tid_v) == 0) return empty_user_data; + const auto& itr = _data.at(tid_v); + return itr.rlock([](const external_correlation_stack_t& data_stack) { + if(data_stack.empty()) return empty_user_data; + return data_stack.back(); + }); + }, + tid); +} + +void +external_correlation::push(rocprofiler_thread_id_t tid, rocprofiler_user_data_t user_data) +{ + // ensure that data contains key for provided thread id + while(!data.ulock( + [](const external_correlation_map_t& _data, rocprofiler_thread_id_t tid_v) { + return (_data.find(tid_v) != _data.end()); + }, + [](external_correlation_map_t& _data, rocprofiler_thread_id_t tid_v) { + _data.emplace(tid_v, external_correlation_stack_t{}); + return true; + }, + tid)) + {} + + // since we know from above that there will be a key for the tid, we start with a read + // lock and then once we have have the mapped data for the key, we leverage the enabling + // of the wlock const overload to remove the constness and use a write lock. If we were to use a + // write lock at the top lovel, then we would unnecessarily block other threads from writing to + // the stack of another thread + data.rlock( + [](const external_correlation_map_t& _data, + rocprofiler_thread_id_t tid_v, + rocprofiler_user_data_t user_data_v) { + const auto& itr = _data.at(tid_v); + itr.wlock([](external_correlation_stack_t& data_stack, + rocprofiler_user_data_t value) { data_stack.emplace_back(value); }, + user_data_v); + }, + tid, + user_data); +} + +rocprofiler_user_data_t +external_correlation::pop(rocprofiler_thread_id_t tid) +{ + static constexpr auto empty_user_data = rocprofiler_user_data_t{.value = 0}; + + return data.wlock( + [](external_correlation_map_t& _data, rocprofiler_thread_id_t tid_v) { + if(_data.count(tid_v) == 0) return empty_user_data; + auto& itr = _data.at(tid_v); + return itr.wlock([](external_correlation_stack_t& data_stack) { + if(data_stack.empty()) return empty_user_data; + auto ret = data_stack.back(); + data_stack.pop_back(); + return ret; + }); + }, + tid); +} +} // namespace external_correlation +} // namespace rocprofiler + +extern "C" { +rocprofiler_status_t +rocprofiler_push_external_correlation_id(rocprofiler_context_id_t context, + rocprofiler_thread_id_t tid, + rocprofiler_user_data_t external_correlation_id) +{ + // assumption is that thread ids are monotonically increasing from the pid + static uint64_t pid_v = getpid(); + if(tid < pid_v) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; + + for(auto& itr : rocprofiler::context::get_registered_contexts()) + { + if(itr->context_idx == context.handle) + { + itr->correlation_tracer.external_correlator.push(tid, external_correlation_id); + return ROCPROFILER_STATUS_SUCCESS; + } + } + + return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; +} + +rocprofiler_status_t +rocprofiler_pop_external_correlation_id(rocprofiler_context_id_t context, + rocprofiler_thread_id_t tid, + rocprofiler_user_data_t* external_correlation_id) +{ + // assumption is that thread ids are monotonically increasing from the pid + static uint64_t pid_v = getpid(); + if(tid < pid_v) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT; + + for(auto& itr : rocprofiler::context::get_registered_contexts()) + { + if(itr->context_idx == context.handle) + { + auto former = itr->correlation_tracer.external_correlator.pop(tid); + if(external_correlation_id) *external_correlation_id = former; + return ROCPROFILER_STATUS_SUCCESS; + } + } + + return ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND; +} +} diff --git a/source/lib/rocprofiler/external_correlation.hpp b/source/lib/rocprofiler/external_correlation.hpp new file mode 100644 index 0000000000..30f8d4b940 --- /dev/null +++ b/source/lib/rocprofiler/external_correlation.hpp @@ -0,0 +1,61 @@ +// MIT License +// +// Copyright (c) 2023 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include + +#include "lib/common/defines.hpp" +#include "lib/common/synchronized.hpp" +#include "lib/common/utility.hpp" + +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace external_correlation +{ +static constexpr bool enable_const_wlock_v = true; + +using external_correlation_stack_t = std::vector; + +// we enable the wlock(...) const for the mapped type so that we can use wlock on the mapped type +// within a rlock of the external correlation map +using external_correlation_map_t = + std::unordered_map>; + +struct external_correlation +{ + rocprofiler_user_data_t get(rocprofiler_thread_id_t) const; + void push(rocprofiler_thread_id_t, rocprofiler_user_data_t); + rocprofiler_user_data_t pop(rocprofiler_thread_id_t); + +private: + common::Synchronized data = {}; +}; +} // namespace external_correlation +} // namespace rocprofiler diff --git a/source/lib/rocprofiler/hsa/agent_cache.cpp b/source/lib/rocprofiler/hsa/agent_cache.cpp index 31272b2c7f..721a77e899 100644 --- a/source/lib/rocprofiler/hsa/agent_cache.cpp +++ b/source/lib/rocprofiler/hsa/agent_cache.cpp @@ -29,7 +29,7 @@ #include "lib/common/utility.hpp" // For Pre-ROCm 6.0 releases -#ifndef HSA_AMD_AGENT_INFO_NEAREST_CPU +#if ROCPROFILER_HSA_RUNTIME_VERSION <= 100900 # define HSA_AMD_AGENT_INFO_NEAREST_CPU 0xA113 #endif @@ -213,4 +213,4 @@ AgentCache::AgentCache(rocprofiler_agent_t agent_t, } } // namespace hsa -} // namespace rocprofiler \ No newline at end of file +} // namespace rocprofiler diff --git a/source/lib/rocprofiler/hsa/hsa.cpp b/source/lib/rocprofiler/hsa/hsa.cpp index 11494d3753..4bb3d4c7d6 100644 --- a/source/lib/rocprofiler/hsa/hsa.cpp +++ b/source/lib/rocprofiler/hsa/hsa.cpp @@ -179,20 +179,23 @@ hsa_api_impl::functor(Args&&... args) struct callback_context_data { - context::context* ctx = nullptr; - rocprofiler_callback_tracing_record_t record = {}; + const context::context* ctx = nullptr; + rocprofiler_callback_tracing_record_t record = {}; + rocprofiler_user_data_t user_data = {.value = 0}; }; struct buffered_context_data { - context::context* ctx = nullptr; + const context::context* ctx = nullptr; + rocprofiler_user_data_t external_correlation = {}; }; + auto thr_id = common::get_tid(); auto callback_contexts = std::vector{}; auto buffered_contexts = std::vector{}; for(const auto& aitr : context::get_active_contexts()) { - auto* itr = aitr.load(); + const auto* itr = aitr.load(); if(!itr) continue; if(itr->callback_tracer) @@ -209,7 +212,8 @@ hsa_api_impl::functor(Args&&... args) // if the given domain + op is not enabled, skip this context if(itr->buffered_tracer->domains(info_type::buffered_domain_idx, info_type::operation_idx)) - buffered_contexts.emplace_back(buffered_context_data{itr}); + buffered_contexts.emplace_back(buffered_context_data{ + itr, itr->correlation_tracer.external_correlator.get(thr_id)}); } } @@ -222,19 +226,21 @@ hsa_api_impl::functor(Args&&... args) return HSA_STATUS_SUCCESS; } - auto buffer_record = rocprofiler_buffer_tracing_hsa_api_record_t{}; - auto tracer_data = rocprofiler_hsa_api_callback_tracer_data_t{}; - auto corr_id = context::correlation_tracing_service::get_unique_record_id(); - auto thr_id = common::get_tid(); + constexpr auto empty_user_data = rocprofiler_user_data_t{.value = 0}; + auto buffer_record = rocprofiler_buffer_tracing_hsa_api_record_t{}; + auto tracer_data = rocprofiler_hsa_api_callback_tracer_data_t{}; + auto internal_corr_id = context::correlation_tracing_service::get_unique_internal_id(); // construct the buffered info before the callback so the callbacks are as closely wrapped // around the function call as possible if(!buffered_contexts.empty()) { - buffer_record.kind = info_type::buffered_domain_idx; - buffer_record.correlation_id = rocprofiler_correlation_id_t{corr_id}; - buffer_record.operation = info_type::operation_idx; - buffer_record.thread_id = thr_id; + buffer_record.kind = info_type::buffered_domain_idx; + // external correlation will be updated right before record is placed in buffer + buffer_record.correlation_id = + rocprofiler_correlation_id_t{internal_corr_id, empty_user_data}; + buffer_record.operation = info_type::operation_idx; + buffer_record.thread_id = thr_id; } // invoke the callbacks @@ -245,38 +251,39 @@ hsa_api_impl::functor(Args&&... args) for(auto& itr : callback_contexts) { - auto& ctx = itr.ctx; - auto& record = itr.record; + auto& ctx = itr.ctx; + auto& record = itr.record; + auto& user_data = itr.user_data; - uint64_t extern_corr_id = 0; - auto& _correlation = ctx->correlation_tracer; - if(_correlation.external_id_callback) - { - _correlation.external_id = _correlation.external_id_callback( - info_type::callback_domain_idx, info_type::operation_idx, corr_id); - extern_corr_id = _correlation.external_id; - } - auto user_data = rocprofiler_user_data_t{.value = 0}; - - record = rocprofiler_callback_tracing_record_t{ - thr_id, - rocprofiler_correlation_id_t{corr_id}, - rocprofiler_external_correlation_id_t{extern_corr_id}, - info_type::callback_domain_idx, - info_type::operation_idx, - ROCPROFILER_SERVICE_CALLBACK_PHASE_ENTER, - user_data, - static_cast(&tracer_data)}; + auto corr_id = rocprofiler_correlation_id_t{ + internal_corr_id, ctx->correlation_tracer.external_correlator.get(thr_id)}; + record = + rocprofiler_callback_tracing_record_t{rocprofiler_context_id_t{ctx->context_idx}, + thr_id, + corr_id, + info_type::callback_domain_idx, + info_type::operation_idx, + ROCPROFILER_SERVICE_CALLBACK_PHASE_ENTER, + static_cast(&tracer_data)}; auto& callback_info = ctx->callback_tracer->callback_data.at(info_type::callback_domain_idx); - callback_info.callback(record, callback_info.data); + callback_info.callback(record, &user_data, callback_info.data); + + // enter callback may update the external correlation id field + record.correlation_id.external = + ctx->correlation_tracer.external_correlator.get(thr_id); } } // record the start timestamp as close to the function call as possible if(!buffered_contexts.empty()) { + for(auto& itr : buffered_contexts) + { + itr.external_correlation = itr.ctx->correlation_tracer.external_correlator.get(thr_id); + } + buffer_record.start_timestamp = common::timestamp_ns(); } @@ -294,15 +301,16 @@ hsa_api_impl::functor(Args&&... args) for(auto& itr : callback_contexts) { - auto& ctx = itr.ctx; - auto& record = itr.record; + auto& ctx = itr.ctx; + auto& record = itr.record; + auto& user_data = itr.user_data; record.phase = ROCPROFILER_SERVICE_CALLBACK_PHASE_EXIT; record.payload = static_cast(&tracer_data); auto& callback_info = ctx->callback_tracer->callback_data.at(info_type::callback_domain_idx); - callback_info.callback(record, callback_info.data); + callback_info.callback(record, &user_data, callback_info.data); } } @@ -318,6 +326,11 @@ hsa_api_impl::functor(Args&&... args) if(bitr && bitr->context_id == itr.ctx->context_idx && bitr->buffer_id == buffer_id.handle) { + // make copy of record + auto record_v = buffer_record; + // update the record with the correlation + record_v.correlation_id.external = itr.external_correlation; + bitr->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING, info_type::buffered_domain_idx, buffer_record); diff --git a/source/lib/rocprofiler/rocprofiler.cpp b/source/lib/rocprofiler/rocprofiler.cpp index 7bc548ea6f..221b825497 100644 --- a/source/lib/rocprofiler/rocprofiler.cpp +++ b/source/lib/rocprofiler/rocprofiler.cpp @@ -25,6 +25,77 @@ #include "lib/common/utility.hpp" +namespace rocprofiler +{ +namespace +{ +#define ROCPROFILER_STATUS_STRING(CODE, MSG) \ + template <> \ + struct status_string \ + { \ + static constexpr auto name = #CODE; \ + static constexpr auto value = MSG; \ + }; + +template +struct status_string; + +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_SUCCESS, "Success") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR, "General error") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_FOUND, "Context ID not found") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND, "Buffer ID not found") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_DOMAIN_NOT_FOUND, "Domain ID not found") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_OPERATION_NOT_FOUND, "Operation ID not found") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_THREAD_NOT_FOUND, "Thread ID not found") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_CONTEXT_ERROR, "General context error") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND, "Agent ID not found") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND, "HW counter not found") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_CONTEXT_INVALID, + "Context configuration is not valid") +ROCPROFILER_STATUS_STRING( + ROCPROFILER_STATUS_ERROR_CONTEXT_NOT_STARTED, + "Context failed to be started (may be already started or atomic swap may have failed)") +ROCPROFILER_STATUS_STRING( + ROCPROFILER_STATUS_ERROR_BUFFER_BUSY, + "Buffer operation failed because it is currently busy handling another request") +ROCPROFILER_STATUS_STRING( + ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGURED, + "Service configuration request would overwrite existing service configuration values") +ROCPROFILER_STATUS_STRING( + ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED, + "Configuration request occurred outside of valid rocprofiler configuration period") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED, + "API function is defined but not implemented") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_ABI, + "Data structure provided by user has a incompatible binary interface " + "with this version of rocprofiler") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT, + "Function invoked with one or more invalid arguments") + +template +const char* +get_status_name(rocprofiler_status_t status, std::index_sequence) +{ + if(status == Idx) return status_string::name; + // recursion until tail empty + if constexpr(sizeof...(Tail) > 0) + return get_status_name(status, std::index_sequence{}); + return nullptr; +} + +template +const char* +get_status_string(rocprofiler_status_t status, std::index_sequence) +{ + if(status == Idx) return status_string::value; + // recursion until tail empty + if constexpr(sizeof...(Tail) > 0) + return get_status_string(status, std::index_sequence{}); + return nullptr; +} +} // namespace +} // namespace rocprofiler + extern "C" { rocprofiler_status_t rocprofiler_get_version(uint32_t* major, uint32_t* minor, uint32_t* patch) @@ -41,4 +112,25 @@ rocprofiler_get_timestamp(rocprofiler_timestamp_t* ts) *ts = rocprofiler::common::timestamp_ns(); return ROCPROFILER_STATUS_SUCCESS; } + +rocprofiler_status_t +rocprofiler_get_thread_id(rocprofiler_thread_id_t* tid) +{ + *tid = rocprofiler::common::get_tid(); + return ROCPROFILER_STATUS_SUCCESS; +} + +const char* +rocprofiler_get_status_name(rocprofiler_status_t status) +{ + return rocprofiler::get_status_name(status, + std::make_index_sequence{}); +} + +const char* +rocprofiler_get_status_string(rocprofiler_status_t status) +{ + return rocprofiler::get_status_string(status, + std::make_index_sequence{}); +} } diff --git a/source/lib/rocprofiler/tests/CMakeLists.txt b/source/lib/rocprofiler/tests/CMakeLists.txt index 11a967af91..13e7d38980 100644 --- a/source/lib/rocprofiler/tests/CMakeLists.txt +++ b/source/lib/rocprofiler/tests/CMakeLists.txt @@ -34,7 +34,7 @@ set_tests_properties(${lib_TESTS} PROPERTIES TIMEOUT 45 LABELS "unittests") # # -------------------------------------------------------------------------------------- # -set(rocprofiler_shared_lib_sources registration.cpp) +set(rocprofiler_shared_lib_sources external_correlation.cpp registration.cpp status.cpp) add_executable(rocprofiler-lib-tests-shared) target_sources(rocprofiler-lib-tests-shared PRIVATE ${rocprofiler_shared_lib_sources}) diff --git a/source/lib/rocprofiler/tests/external_correlation.cpp b/source/lib/rocprofiler/tests/external_correlation.cpp new file mode 100644 index 0000000000..fc9f8e5a4a --- /dev/null +++ b/source/lib/rocprofiler/tests/external_correlation.cpp @@ -0,0 +1,503 @@ +// MIT License +// +// Copyright (c) 2023 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include + +#include "lib/common/environment.hpp" +#include "lib/common/units.hpp" +#include "lib/common/utility.hpp" +#include "rocprofiler/external_correlation.h" +#include "rocprofiler/fwd.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ROCPROFILER_CALL(ARG, MSG) \ + { \ + auto _status = (ARG); \ + EXPECT_EQ(_status, ROCPROFILER_STATUS_SUCCESS) << MSG << " :: " << #ARG; \ + } + +namespace +{ +struct callback_data +{ + rocprofiler_client_id_t* client_id = nullptr; + rocprofiler_client_finalize_t client_fini_func = nullptr; + rocprofiler_context_id_t client_ctx = {}; + rocprofiler_buffer_id_t client_buffer = {}; + rocprofiler_callback_thread_t client_thread = {}; + uint64_t client_workflow_count = {}; + uint64_t client_callback_count = {}; + int64_t current_depth = 0; + int64_t max_depth = 0; + std::map client_correlation = {}; +}; + +struct agent_data +{ + uint64_t agent_count = 0; + std::vector agents = {}; +}; + +void +tool_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t* user_data, + void* client_data) +{ + static auto mtx = std::mutex{}; + auto lk = std::unique_lock{mtx}; + + auto* cb_data = static_cast(client_data); + auto now = rocprofiler::common::timestamp_ns(); + auto internal_corr_id = record.correlation_id.internal; + auto& external_corr_id = record.correlation_id.external; + static auto first_now = now; + + ASSERT_NE(cb_data, nullptr); + + cb_data->client_callback_count++; + + static auto first = std::once_flag{}; + std::call_once( + first, [record]() { EXPECT_EQ(record.phase, ROCPROFILER_SERVICE_CALLBACK_PHASE_ENTER); }); + + if(record.phase == ROCPROFILER_SERVICE_CALLBACK_PHASE_ENTER) + { + EXPECT_EQ(cb_data->client_correlation.find(internal_corr_id), + cb_data->client_correlation.end()) + << "entry for internal correlation id " << internal_corr_id << " already exists"; + + cb_data->client_correlation[internal_corr_id] = external_corr_id; + + user_data->value = now; + auto current_depth = cb_data->current_depth++; + + if(current_depth == 0) + { + uint64_t tid = 0; + ROCPROFILER_CALL(rocprofiler_get_thread_id(&tid), "Failed to get thread id"); + EXPECT_EQ(external_corr_id.value, tid); + } + + ROCPROFILER_CALL(rocprofiler_push_external_correlation_id( + record.context_id, + record.thread_id, + rocprofiler_user_data_t{.value = (internal_corr_id + 1) * 1000}), + "Failed to push new external correlation"); + } + else + { + EXPECT_NE(cb_data->client_correlation.find(internal_corr_id), + cb_data->client_correlation.end()) + << "entry for internal correlation id " << internal_corr_id << " does not exist"; + + EXPECT_EQ(external_corr_id.value, (internal_corr_id + 1) * 1000) + << "external correlation id change was not retained"; + + auto external_corr_data = rocprofiler_user_data_t{}; + ROCPROFILER_CALL(rocprofiler_pop_external_correlation_id( + record.context_id, record.thread_id, &external_corr_data), + "Failed to pop external correlation"); + + EXPECT_EQ(external_corr_data.value, (internal_corr_id + 1) * 1000) + << "external correlation pop did not return current external correlation"; + + EXPECT_GT(user_data->value, 0) << "user data not set"; + EXPECT_GE(user_data->value, first_now) << "timestamp not monotonically increasing"; + EXPECT_LT(user_data->value, now) << "timestamp not monotonically increasing"; + EXPECT_GT(cb_data->current_depth, 0) << "depth should be > 0"; + + cb_data->max_depth = std::max(cb_data->current_depth, cb_data->max_depth); + cb_data->current_depth--; + } +} + +void +tool_tracing_buffered(rocprofiler_context_id_t context, + rocprofiler_buffer_id_t buffer_id, + rocprofiler_record_header_t** headers, + size_t num_headers, + void* buffer_data, + uint64_t drop_count) +{ + std::cout << __FUNCTION__ << "...\n" << std::endl; + auto* cb_data = static_cast(buffer_data); + + auto v_records = std::vector{}; + v_records.reserve(num_headers); + + for(size_t i = 0; i < num_headers; ++i) + { + auto* header = headers[i]; + + ASSERT_TRUE(header != nullptr); + auto hash = rocprofiler_record_header_compute_hash(header->category, header->kind); + EXPECT_EQ(header->hash, hash); + EXPECT_TRUE(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING && + header->kind == ROCPROFILER_SERVICE_BUFFER_TRACING_HSA_API); + + v_records.emplace_back( + static_cast(header->payload)); + } + + std::sort(v_records.begin(), v_records.end(), [](auto lhs, auto rhs) { + return (lhs->start_timestamp == rhs->start_timestamp) + ? (lhs->end_timestamp < rhs->end_timestamp) + : (lhs->start_timestamp < rhs->start_timestamp); + }); + + for(auto* record : v_records) + { + auto info = std::stringstream{}; + info << "tid=" << record->thread_id << ", context=" << context.handle + << ", buffer_id=" << buffer_id.handle << ", cid=" << record->correlation_id.internal + << ", kind=" << record->kind << ", operation=" << record->operation + << ", drop_count=" << drop_count << ", start=" << record->start_timestamp + << ", stop=" << record->end_timestamp; + + static int64_t last_corr_id = -1; + auto corr_id = static_cast(record->correlation_id.internal); + + std::cout << info.str() << "\n" << std::flush; + EXPECT_GE(context.handle, 0) << info.str(); + EXPECT_GT(record->thread_id, 0) << info.str(); + EXPECT_GT(record->kind, 0) << info.str(); + EXPECT_GT(corr_id, last_corr_id) << info.str(); + EXPECT_GT(record->start_timestamp, 0) << info.str(); + EXPECT_GT(record->end_timestamp, 0) << info.str(); + EXPECT_LE(record->start_timestamp, record->end_timestamp) << info.str(); + + cb_data->client_callback_count++; + last_corr_id = corr_id; + } +} + +void +thread_precreate(rocprofiler_internal_thread_library_t /*lib*/, void* tool_data) +{ + auto* cb_data = static_cast(tool_data); + cb_data->client_workflow_count++; +} + +void +thread_postcreate(rocprofiler_internal_thread_library_t /*lib*/, void* tool_data) +{ + auto* cb_data = static_cast(tool_data); + cb_data->client_workflow_count++; +} +} // namespace + +TEST(rocprofiler_lib, callback_external_correlation) +{ + using init_func_t = int (*)(rocprofiler_client_finalize_t, void*); + using fini_func_t = void (*)(void*); + + using hsa_iterate_agents_cb_t = hsa_status_t (*)(hsa_agent_t, void*); + + auto cmd_line = rocprofiler::common::read_command_line(getpid()); + ASSERT_FALSE(cmd_line.empty()); + + static init_func_t tool_init = [](rocprofiler_client_finalize_t fini_func, + void* client_data) -> int { + auto* cb_data = static_cast(client_data); + + cb_data->client_workflow_count++; + cb_data->client_fini_func = fini_func; + + ROCPROFILER_CALL(rocprofiler_create_context(&cb_data->client_ctx), + "failed to create context"); + + ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service( + cb_data->client_ctx, + ROCPROFILER_SERVICE_CALLBACK_TRACING_HSA_API, + nullptr, + 0, + tool_tracing_callback, + client_data), + "callback tracing service failed to configure"); + + int valid_ctx = 0; + ROCPROFILER_CALL(rocprofiler_context_is_valid(cb_data->client_ctx, &valid_ctx), + "failure checking context validity"); + + EXPECT_EQ(valid_ctx, 1); + + ROCPROFILER_CALL(rocprofiler_start_context(cb_data->client_ctx), + "rocprofiler context start failed"); + + // no errors + return 0; + }; + + static fini_func_t tool_fini = [](void* client_data) -> void { + auto* cb_data = static_cast(client_data); + ROCPROFILER_CALL(rocprofiler_stop_context(cb_data->client_ctx), + "rocprofiler context stop failed"); + + static_cast(client_data)->client_workflow_count++; + }; + + static auto cb_data = callback_data{}; + + static auto cfg_result = + rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t), + tool_init, + tool_fini, + static_cast(&cb_data)}; + + static rocprofiler_configure_func_t rocp_init = + [](uint32_t version, + const char* runtime_version, + uint32_t prio, + rocprofiler_client_id_t* client_id) -> rocprofiler_tool_configure_result_t* { + auto expected_version = ROCPROFILER_VERSION; + EXPECT_EQ(expected_version, version); + EXPECT_EQ(std::string_view{runtime_version}, std::string_view{ROCPROFILER_VERSION_STRING}); + EXPECT_EQ(prio, 0); + EXPECT_EQ(client_id->name, nullptr); + cb_data.client_id = client_id; + cb_data.client_id->name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); + return &cfg_result; + }; + + EXPECT_EQ(rocprofiler_force_configure(rocp_init), ROCPROFILER_STATUS_SUCCESS); + + uint64_t tid = 0; + ROCPROFILER_CALL(rocprofiler_get_thread_id(&tid), "failed to get thread id"); + + ROCPROFILER_CALL(rocprofiler_push_external_correlation_id( + cb_data.client_ctx, tid, rocprofiler_user_data_t{.value = tid}), + "failed to push correlation id"); + + hsa_iterate_agents_cb_t agent_cb = [](hsa_agent_t agent, void* data) { + static_cast(data)->agent_count++; + + auto status = HSA_STATUS_SUCCESS; + auto agent_type = hsa_device_type_t{}; + if((status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &agent_type)) == + HSA_STATUS_SUCCESS) + static_cast(data)->agents.emplace_back(agent_type); + + return status; + }; + + auto _agent_data = agent_data{}; + uint64_t num_runs = 0; + hsa_init(); + auto run = [&agent_cb, &_agent_data, &num_runs]() { + ++num_runs; + uint64_t _tid = 0; + ROCPROFILER_CALL(rocprofiler_get_thread_id(&_tid), "failed to get thread id"); + ROCPROFILER_CALL(rocprofiler_push_external_correlation_id( + cb_data.client_ctx, _tid, rocprofiler_user_data_t{.value = _tid}), + "failed to push correlation id"); + + hsa_status_t itr_status = hsa_iterate_agents(agent_cb, static_cast(&_agent_data)); + EXPECT_EQ(itr_status, HSA_STATUS_SUCCESS); + + auto user_data = rocprofiler_user_data_t{}; + ROCPROFILER_CALL( + rocprofiler_pop_external_correlation_id(cb_data.client_ctx, _tid, &user_data), + "failed to push correlation id"); + EXPECT_EQ(user_data.value, _tid) + << "callback modification to external correlation id should not be seen here"; + }; + + run(); + std::thread{run}.join(); + std::thread{run}.join(); + + EXPECT_GT(_agent_data.agent_count, 0); + EXPECT_EQ(_agent_data.agent_count, _agent_data.agents.size()); + + ASSERT_NE(cb_data.client_id, nullptr); + ASSERT_NE(cb_data.client_fini_func, nullptr); + + cb_data.client_fini_func(*cb_data.client_id); + + // expected callback count is two for each hsa_iterate_agents and two callbacks for + // hsa_agent_get_info for each agent. + uint64_t expected_cb_count = (2 * num_runs) + (2 * _agent_data.agent_count); + + EXPECT_EQ(cb_data.client_workflow_count, 2); + EXPECT_EQ(cb_data.client_callback_count, expected_cb_count); + EXPECT_EQ(cb_data.client_correlation.size(), expected_cb_count / 2); + EXPECT_EQ(cb_data.current_depth, 0); + EXPECT_EQ(cb_data.max_depth, 2); +} + +TEST(rocprofiler_lib, buffered_external_correlation) +{ + using init_func_t = int (*)(rocprofiler_client_finalize_t, void*); + using fini_func_t = void (*)(void*); + + using hsa_iterate_agents_cb_t = hsa_status_t (*)(hsa_agent_t, void*); + + auto cmd_line = rocprofiler::common::read_command_line(getpid()); + ASSERT_FALSE(cmd_line.empty()); + + static init_func_t tool_init = [](rocprofiler_client_finalize_t fini_func, + void* client_data) -> int { + auto* cb_data = static_cast(client_data); + + cb_data->client_workflow_count++; + cb_data->client_fini_func = fini_func; + + ROCPROFILER_CALL(rocprofiler_create_context(&cb_data->client_ctx), + "failed to create context"); + + ROCPROFILER_CALL(rocprofiler_create_buffer(cb_data->client_ctx, + 4096, + 2048, + ROCPROFILER_BUFFER_POLICY_LOSSLESS, + tool_tracing_buffered, + client_data, + &cb_data->client_buffer), + "buffer creation failed"); + + ROCPROFILER_CALL( + rocprofiler_configure_buffer_tracing_service(cb_data->client_ctx, + ROCPROFILER_SERVICE_BUFFER_TRACING_HSA_API, + nullptr, + 0, + cb_data->client_buffer), + "buffer tracing service failed to configure"); + + ROCPROFILER_CALL(rocprofiler_create_callback_thread(&cb_data->client_thread), + "failure creating callback thread"); + + ROCPROFILER_CALL( + rocprofiler_assign_callback_thread(cb_data->client_buffer, cb_data->client_thread), + "failed to assign thread for buffer"); + + int valid_ctx = 0; + ROCPROFILER_CALL(rocprofiler_context_is_valid(cb_data->client_ctx, &valid_ctx), + "failure checking context validity"); + + EXPECT_EQ(valid_ctx, 1); + + ROCPROFILER_CALL(rocprofiler_start_context(cb_data->client_ctx), + "rocprofiler context start failed"); + + // no errors + return 0; + }; + + static fini_func_t tool_fini = [](void* client_data) -> void { + auto* cb_data = static_cast(client_data); + ROCPROFILER_CALL(rocprofiler_stop_context(cb_data->client_ctx), + "rocprofiler context stop failed"); + + static_cast(client_data)->client_workflow_count++; + }; + + static auto cb_data = callback_data{}; + + static auto cfg_result = + rocprofiler_tool_configure_result_t{sizeof(rocprofiler_tool_configure_result_t), + tool_init, + tool_fini, + static_cast(&cb_data)}; + + static rocprofiler_configure_func_t rocp_init = + [](uint32_t version, + const char* runtime_version, + uint32_t prio, + rocprofiler_client_id_t* client_id) -> rocprofiler_tool_configure_result_t* { + auto expected_version = ROCPROFILER_VERSION; + EXPECT_EQ(expected_version, version); + EXPECT_EQ(std::string_view{runtime_version}, std::string_view{ROCPROFILER_VERSION_STRING}); + EXPECT_EQ(prio, 0); + EXPECT_EQ(client_id->name, nullptr); + cb_data.client_id = client_id; + cb_data.client_id->name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); + + ROCPROFILER_CALL(rocprofiler_at_internal_thread_create(thread_precreate, + thread_postcreate, + ROCPROFILER_LIBRARY, + static_cast(&cb_data)), + "failed to register for thread creation notifications"); + + return &cfg_result; + }; + + auto ctx = rocprofiler_context_id_t{}; + EXPECT_NE(rocprofiler_create_context(&ctx), ROCPROFILER_STATUS_SUCCESS); + EXPECT_EQ(rocprofiler_force_configure(rocp_init), ROCPROFILER_STATUS_SUCCESS); + EXPECT_NE(rocprofiler_create_context(&ctx), ROCPROFILER_STATUS_SUCCESS); + + hsa_iterate_agents_cb_t agent_cb = [](hsa_agent_t agent, void* data) { + static_cast(data)->agent_count++; + + auto status = HSA_STATUS_SUCCESS; + auto agent_type = hsa_device_type_t{}; + if((status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &agent_type)) == + HSA_STATUS_SUCCESS) + static_cast(data)->agents.emplace_back(agent_type); + + return status; + }; + + auto _agent_data = agent_data{}; + hsa_init(); + hsa_status_t itr_status = hsa_iterate_agents(agent_cb, static_cast(&_agent_data)); + + EXPECT_EQ(itr_status, HSA_STATUS_SUCCESS); + EXPECT_GT(_agent_data.agent_count, 0); + EXPECT_EQ(_agent_data.agent_count, _agent_data.agents.size()); + + ASSERT_NE(cb_data.client_id, nullptr); + ASSERT_NE(cb_data.client_fini_func, nullptr); + + EXPECT_EQ(rocprofiler_flush_buffer(cb_data.client_buffer), ROCPROFILER_STATUS_SUCCESS); + + cb_data.client_fini_func(*cb_data.client_id); + + // expected callback count is two for hsa_iterate_agents and two callbacks for + // hsa_agent_get_info for each agent. + uint64_t expected_cb_count = 1 + _agent_data.agent_count; + // expect the tool init, tool fini, and two calls to thread_precreate and thread_postcreate each + // (the main thread and the assigned thread for the buffer) + uint64_t expected_workflow_count = 6; + + EXPECT_EQ(cb_data.client_workflow_count, expected_workflow_count); + EXPECT_EQ(cb_data.client_callback_count, expected_cb_count); + EXPECT_GT(cb_data.client_thread.handle, 0); + EXPECT_EQ(cb_data.current_depth, 0); + EXPECT_EQ(cb_data.max_depth, 0); +} diff --git a/source/lib/rocprofiler/tests/registration.cpp b/source/lib/rocprofiler/tests/registration.cpp index c8065ab3d7..56e0850191 100644 --- a/source/lib/rocprofiler/tests/registration.cpp +++ b/source/lib/rocprofiler/tests/registration.cpp @@ -69,7 +69,9 @@ struct agent_data }; void -tool_tracing_callback(rocprofiler_callback_tracing_record_t record, void* client_data) +tool_tracing_callback(rocprofiler_callback_tracing_record_t record, + rocprofiler_user_data_t*, + void* client_data) { using name_map_t = std::unordered_map>; @@ -227,13 +229,13 @@ tool_tracing_buffered(rocprofiler_context_id_t context, { auto info = std::stringstream{}; info << "tid=" << record->thread_id << ", context=" << context.handle - << ", buffer_id=" << buffer_id.handle << ", cid=" << record->correlation_id.id + << ", buffer_id=" << buffer_id.handle << ", cid=" << record->correlation_id.internal << ", kind=" << record->kind << ", operation=" << record->operation << ", drop_count=" << drop_count << ", start=" << record->start_timestamp << ", stop=" << record->end_timestamp; static int64_t last_corr_id = -1; - auto corr_id = static_cast(record->correlation_id.id); + auto corr_id = static_cast(record->correlation_id.internal); std::cout << info.str() << "\n" << std::flush; EXPECT_GE(context.handle, 0) << info.str(); diff --git a/source/lib/rocprofiler/tests/status.cpp b/source/lib/rocprofiler/tests/status.cpp new file mode 100644 index 0000000000..8dc595e600 --- /dev/null +++ b/source/lib/rocprofiler/tests/status.cpp @@ -0,0 +1,53 @@ +// MIT License +// +// Copyright (c) 2023 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include +#include + +#include + +#include + +TEST(rocprofiler_lib, error_string) +{ + for(size_t i = 0; i < static_cast(ROCPROFILER_STATUS_LAST); ++i) + { + auto status = static_cast(i); + const auto* name = rocprofiler_get_status_name(status); + const auto* message = rocprofiler_get_status_string(status); + + ASSERT_NE(name, nullptr) << "idx=" << i; + ASSERT_NE(message, nullptr) << name << " (idx=" << i << ")"; + + std::cout << std::setw(60) << name << " :: " << message << "\n"; + + if(i == ROCPROFILER_STATUS_SUCCESS) + { + EXPECT_EQ(std::string_view{message}, std::string_view{"Success"}); + } + else + { + EXPECT_GE(std::string_view{message}.length(), 8) + << "status message for " << name << " (idx=" << i << ") is too short"; + } + } +} diff --git a/source/lib/tests/common/demangling.cpp b/source/lib/tests/common/demangling.cpp index df7aff8cdb..850c0b91b3 100644 --- a/source/lib/tests/common/demangling.cpp +++ b/source/lib/tests/common/demangling.cpp @@ -36,15 +36,15 @@ TEST(common, demangling) using strview_pair_t = std::pair; for(auto [mangled, demangled] : - {strview_pair_t{"_ZN11rocprofiler8internal18correlation_config20get_unique_record_idEv", - "rocprofiler::internal::correlation_config::get_unique_record_id()"}, + {strview_pair_t{"_ZN11rocprofiler8internal18correlation_config22get_unique_internal_idEv", + "rocprofiler::internal::correlation_config::get_unique_internal_id()"}, strview_pair_t{"_ZN11rocprofiler8internal18get_active_configsEv", "rocprofiler::internal::get_active_configs()"}, strview_pair_t{"_ZN11rocprofiler8internal22get_registered_configsEv", "rocprofiler::internal::get_registered_configs()"}, strview_pair_t{ - "_ZZN11rocprofiler8internal18correlation_config20get_unique_record_idEvE2_v", - "rocprofiler::internal::correlation_config::get_unique_record_id()::_v"}, + "_ZZN11rocprofiler8internal18correlation_config22get_unique_internal_idEvE2_v", + "rocprofiler::internal::correlation_config::get_unique_internal_id()::_v"}, strview_pair_t{"_ZZN11rocprofiler8internal18get_active_configsEvE2_v", "rocprofiler::internal::get_active_configs()::_v"}, strview_pair_t{"_ZZN11rocprofiler8internal22get_registered_configsEvE2_v",