Files
rocm-systems/source/lib/rocprofiler/hsa/hsa.cpp
T
Jonathan R. Madsen d3eaacd610 Contexts, tracing, include reorg, registration, thread-pool (#65)
* Update scripts/update-doxygen.sh

- ensure build-docs folder exists

* Update scripts/run-ci.py

- exclude files in details subdirectory from code coverage

* Update scripts/thread-sanitizer-suppr.txt

- exclude races in glog

* Update docs/rocprofiler.dox.in

- exclude defines in include/rocprofiler/defines.h from doxygen
- Tweak EXCLUDE_PATTERNS and EXAMPLE_PATTERNS

* Update docs workflow

- trigger workflow whenever there is a change to the public headers (which may be doxygen comments)

* Update include/rocprofiler (reorg and overhaul)

- rocprofiler_status_t additions
  - CONTEXT_NOT_FOUND
  - CONTEXT_ERROR
  - INVALID_CONTEXT_ID
  - INVALID_CONTEXT
  - BUFFER_BUSY
- rocprofiler_context_is_active func
- rocprofiler_context_is_valid func
- rocprofiler_service_callback_tracing_kind_t update
  - remove ROCPROFILER_SERVICE_CALLBACK_TRACING_HELPER_THREAD
- Remove rocprofiler_tracing_helper_thread_operation_t
- Remove rocprofiler_helper_thread_callback_tracer_data_t
- Added rocprofiler_internal_thread_library_t
- Added rocprofiler_at_internal_thread_create
- split rocprofiler.h into several smaller headers
- reworked rocprofiler_status_t values
- added doxygen comments for enums
- replaced rocprofiler_trace_record_operation_kind_t with rocprofiler_trace_operation_t
- use @ instead of / in doxygen comment in rocprofiler_plugin.h
- fix ref to ROCPROFILER_SERVICE_CALLBACK_TRACING_MARKER_API
- end group in fwd.h
- remove PROFILE_COUNTING group in dispatch_profile.h
- remove premature group close in callback_tracing.h
- hsa.h: remove rocprofiler_hsa_trace_data_t
- fwd.h: remove rocprofiler_tracer_callback_data_t
- rename rocprofiler_correlation_id_t.handle to rocprofiler_correlation_id_t.id (consistency)
- fwd.h: add rocprofiler_callback_tracing_record_t
- callback_tracing.h: update rocprofiler_hsa_api_callback_tracer_data_t
- callback_tracing.h: add size fields
- simplify rocprofiler_tracer_callback_t
- removed ROCPROFILER_NONNULL from rocprofiler_get_version
- added rocprofiler_get_timestamp
- ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED in rocprofiler_status_t
- add ROCPROFILER_STATUS_ERROR_THREAD_NOT_FOUND rocprofiler_status_t
- add rocprofiler_buffer_category_t
- rocprofiler_trace_operation_t -> rocprofiler_tracing_operation_t
- rocprofiler_user_data_t union
- tweak rocprofiler_callback_tracing_record_t
  - make external_correlation_id non-pointer
  - add rocprofiler_user_data_t data field
- tweak rocprofiler_record_header_t
  - instead of single uint64_t kind field, have union for category + kind (two u32) with u64 hash
- API extensions for kind id <-> kind string
- API extensions for operation id <-> operation string
- rocprofiler_callback_trace_kind_name_cb_t
- rocprofiler_callback_trace_operation_name_cb_t
- rocprofiler_iterate_callback_trace_kind_names
- rocprofiler_iterate_callback_trace_kind_operation_names
- modify rocprofiler_hsa_api_callback_tracer_data_t data members (remove pointers)
- add rocprofiler_callback_trace_operation_args_cb_t function pointer typedef
- add rocprofiler_iterate_callback_trace_operation_args function
- fixed inconsistent use of *_trace_* vs. *_tracing_* (opting for tracing)
- removed rocprofiler_query_callback_trace_kind_name
- removed rocprofiler_query_callback_kind_operation_name
- Add include/rocprofiler/registration.h
  - header dedicated to registering a tool/client with rocprofiler
  - this header is not intended to be included by rocprofiler.h
  - rocprofiler_client_id_t
    - identifier for client tool
  - rocprofiler_client_finalize_t
    - function pointer prototype for tool-initiated finalization
  - rocprofiler_tool_initialize_t
    - function pointer prototype for tool initialization (i.e. configuration)
  - rocprofiler_tool_finalize_t
    - function pointer prototype for tool finalization
  - rocprofiler_tool_configure_result_t
    - struct returned by tool/client to rocprofiler
  - rocprofiler_is_initialized
    - function for querying whether tool-induced initialization is possible
  - rocprofiler_is_finalized
    - function for querying whether rocprofiler has been finalized
  - rocprofiler_configure prototype
    - this is the function tools implement
    - prototype is always marked as having default visibility
    - no implementation in rocprofiler
  - added typedef for rocprofiler_configure function pointer
  - added rocprofiler_force_configure to explicitly invoke rocprofiler_configure instead of relying on lazy init
- made callback typedef names more consistent (_cb_t suffix)
- typedef for rocprofiler_internal_thread_library_cb_t function pointer
- added rocprofiler_at_internal_thread_create function
- added rocprofiler_callback_thread_t struct
- added rocprofiler_create_callback_thread function
- added rocprofiler_assign_callback_thread function
- removed rocprofiler_buffer_tracing_record_header_t in favor of kind and correlation id in each record type
- added rocprofiler_buffer_tracing_kind_name_cb_t typedef
- added rocprofiler_buffer_tracing_operation_name_cb_t typedef
- added rocprofiler_iterate_buffer_tracing_kind_names function
- added rocprofiler_iterate_buffer_tracing_kind_operation_names function
- removed rocprofiler_query_buffer_trace_kind_name function
- removed rocprofiler_query_buffer_kind_operation_name function

* Update lib/common/container/stable_vector.hpp

- include limits header
- reserve_size struct
- overload stable_vector constructor to support reserving as part of construction

* Update lib/common/container/record_header_buffer.{hpp,cpp}

- add emplace member function accepting category and kind (two u32 variables) instead of one u64 kind
- use std::shared_mutex to prevent data-race when reading m_headers
- record_header_buffer is now multiple writer, single reader
- add read_lock member function (shared)
- add read_unlock member function (shared)
- lock member function gets exclusive lock
- unlock member function releases exclusive lock

* Rename "config" to "context" + restructure + implement

- Restructure config files + license
  - move config files into lib/rocprofiler/config subfolder
  - rename some files
  - add license to some files which were missing it
- Rename config/helpers.hpp
  - rename to allocator.hpp
  - remove get_domain_max_ops
- Create config/domain.{hpp,cpp}
  - structures for handling tracing domains and ops
- Update config/config.{hpp,cpp}
  - buffer_instance struct
  - callback_tracing_service struct
  - buffer_tracing_service struct
  - config struct
  - allocate_{config,buffer} func
  - {validate,start,stop}_config funcs
  - get_registered_configs func
  - get_active_configs func
  - get_buffers func
- Update rocprofiler.cpp
  - Implement rocprofiler_create_context
  - Implement rocprofiler_start_context
  - Implement rocprofiler_stop_context
  - Implement rocprofiler_context_is_active
  - Implement rocprofiler_context_is_valid
  - Implement rocprofiler_flush_buffer
  - Implement rocprofiler_destroy_buffer
  - Implement rocprofiler_create_buffer
- Update lib/rocprofiler/hsa
  - use rocprofiler_tracer_activity_domain_t instead of rocprofiler_tracer_activity_domain_t
  - remove ROCPROFILER_TRACER_ACTIVITY_DOMAIN_HSA_API fromHSA_API_INFO_DEFINITION_* macros
- Update lib/rocprofiler/context/domain.*
  - fixes for domain_info (i.e. use correct enums)
  - update rocprofiler_status_t codes
  - fix template instantiations
- Update lib/rocprofiler/context/context.*
  - use rocprofiler_service_callback_tracing_kind_t instead of rocprofiler_tracer_activity_domain_t
  - rename correlation_context to correlation_tracing_service
  - fix domains in callback_tracing_service and buffer_tracing_service
  - unique_ptr for callback_tracer and buffered_tracer in context
- Update lib/rocprofiler/rocprofiler.cpp
  - implement rocprofiler_configure_callback_tracing_service
- Update lib/rocprofiler/hsa/ostream.hpp
  - include rocprofiler.h instead of tracer.hpp
- Update lib/rocprofiler/hsa
  - migration to use rocprofiler_hsa_api_callback_tracer_data_t instead of rocprofiler_hsa_trace_data_t
  - restructure hsa_api_impl<Idx>
    - remove phase_enter and phase_exit
    - add set_data_args (partial replacement for phase_enter)
    - functor handles the contexts
- Update lib/rocprofiler/rocprofiler.cpp
  - implement rocprofiler_get_version
- Update lib/rocprofiler/hsa/hsa.{hpp,cpp}
  - remove hsa_api_ prefix for functions already in hsa namespace
- Update lib/rocprofiler/context/context.{hpp,cpp}
  - add client_idx to context struct (tool identifier)
  - add push_client function to set client_idx before context is allocated
  - add pop_client function to remove client identifier from future context creations
  - implemented {registered,active}_contexts and buffers to use new container::reserve_size overload to stable_vector
  - fix implementation of start_context
  - fix implementation of stop_context
- Update lib/rocprofiler/rocprofiler.cpp
  - prevent context creation, buffer creation, pc sampling config, etc. after initialization
  - add nullptr checks to rocprofiler_context_is_valid
  - fix rocprofiler_configure_callback_tracing_service
    - was checking size of buffers, not registered context
  - implement rocprofiler_iterate_callback_trace_kind_names
  - implement rocprofiler_iterate_callback_trace_kind_operation_names
- Update lib/rocprofiler/CMakeLists.txt
  - add registration.{hpp,cpp} to rocprofiler-library target sources
- Update lib/rocprofiler/hsa/utils.hpp
  - fix using fmt::formt with const char* strings
  - remove join functions (no longer used)
- Update lib/rocprofiler/hsa/hsa.{hpp,cpp}
  - remove args_string function
  - remove named_args_string function
  - update iterate_args function
    - change callback type
    - accept user data
  - rework the hsa_api_impl<Idx>::functor function
    - save the rocprofiler_callback_tracing_record_t between callbacks
  - update update_table function
    - check buffered_tracer domains
  - remove comments
- Update lib/rocprofiler/hsa/defines.hpp
  - remove MEMBER_<N> macros
  - add ADDR_MEMBER_<N> macros
  - remove doxygen comments for GET_MEMBER_FIELDS
  - add GET_ADDR_MEMBER_FIELDS
  - update HSA_API_INFO_DEFINITION_{0,V}
    - rename domain_idx to callback_domain_idx
    - add buffered_domain_idx
    - add as_arg_addr function
- Update lib/rocprofiler/rocprofiler.cpp
  - implement rocprofiler_iterate_callback_trace_operation_args
- Remove lib/rocprofiler/tracing.{hpp,cpp} and lib/rocprofiler/CMakeLists.txt
  - unused
- Update lib/rocprofiler/hsa/hsa.{hpp,cpp}
  - support buffered tracing in hsa_api_impl<Idx>::functor
  - rocprofiler_callback_trace_operation_args_cb_t -> rocprofiler_callback_tracing_operation_args_cb_t
    - i.e. trace -> tracing
- Update lib/rocprofiler/context/context.{hpp,cpp}
  - removed buffer_instance struct
  - removed allocate_buffer function
  - removed get_buffers function
  - changed buffer_tracing_service::buffer_array_t
- Update lib/rocprofiler/hsa: hsa.cpp, ostream.hpp, details folder
  - move ostream.hpp into details folder to prevent from contributing to code coverage
  - update cmake build system for new directory

* Add lib/rocprofiler/registration.{hpp,cpp}

- implements rocprofiler_set_api_table (called by rocprofiler-register)
- miscellaneous functions for client configure/initialize/finalize
- functions for querying the init/fini status
- relocated OnLoad HSA workaround to this file
  - at present, this is used to workaround ROCr not having rocprofiler-register integration yet
- implement rocprofiler_force_configure function
- implement rocprofiler_is_initialized function
- implement rocprofiler_is_finalized function
- ensure configure functions only invoked once
- ensure internal thread creation notification functions are invoked
- get_status is pair of atomics
- fix heap-use-after-free in init_logging
- update finalize
  - invoke hsa_shut_down
  - set all active contexts to null pointers

* Add lib/rocprofiler/buffer_tracing.cpp

- contains implementations of buffer_tracing (i.e. rocprofiler/buffer_tracing.h)
- previous implementation may have been moved out of lib/rocprofiler/rocprofiler.cpp

* Add lib/rocprofiler/buffer.{hpp,cpp}

- contains implementations of buffer (i.e. rocprofiler/buffer.h) and misc internal access functions
- previous implementation may have been moved out of lib/rocprofiler/rocprofiler.cpp and lib/rocprofiler/context/context.{hpp,cpp}

* Add lib/rocprofiler/callback_tracing.cpp

- contains implementations of callback_tracing (i.e. rocprofiler/callback_tracing.h)
- previous implementation may have been moved out of lib/rocprofiler/rocprofiler.cpp

* Add lib/rocprofiler/context.cpp

- contains implementations of context public API functions (i.e. rocprofiler/context.h)
- previous implementation may have been moved out of lib/rocprofiler/rocprofiler.cpp

* Add lib/rocprofiler/internal_threading.{hpp,cpp}

- contains implementations of internal_threading (i.e. rocprofiler/internal_threading.h)
- also contains implementations of internal access functions
- update finalize function
  - join all task groups and destroy all thread pools first, then reset unique_ptr

* Update lib/rocprofiler/rocprofiler.cpp

- rocprofiler_get_version returns status
- implement rocprofiler_get_timestamp
- remove misc implementations that were split into other files

* Update lib/rocprofiler/CMakeLists.txt

- compile new implementation files
  - buffer.cpp
  - buffer_tracing.cpp
  - callback_tracing.cpp
  - context.cpp
  - internal_threading.cpp

* Update lib/tests/buffering/buffering-*.cpp

- update to reflect changes to rocprofiler_record_header_t

* Update CMakeLists.txt

- increase minimum cmake version to 3.21 which added HIP support as a language

* Add samples/apps/transpose

- simple HIP application for testing

* Add samples/api_callback_tracing

- HIP application and tool library
- This effectively demos how to setup HSA API tracing
  - For each function called in tool, it stores the func/file/line and prints it during finalization
- client.hpp and client.cpp are the tool library
- Implement use of rocprofiler_iterate_callback_trace_operation_args
- add demo of using rocprofiler_get_version
- add_test
  - remove PASS_REGULAR_EXPRESSION
    - causing false passes during memcheck
  - add ROCPROFILER_MEMCHECK_PRELOAD_ENV to environment
- check if rocprofiler is initialized before stopping context

* Add samples/api_buffered_tracing

- Sample demonstrating tracing the HSA API via buffering
- demo rocprofiler_record_header_compute_hash
- throw exceptions for unexpected buffer data
- add_test
  - remove PASS_REGULAR_EXPRESSION
    - causing false passes during memcheck
  - add ROCPROFILER_MEMCHECK_PRELOAD_ENV to environment

* Update samples/CMakeLists.txt

- add subdirectory for api_callback_tracing
- add subdirectory api_buffered_tracing

* Update samples/pc_sampling/common.h

- fix processing of headers

* Update lib/rocprofiler/hsa/details/ostream.hpp

- fix data race on HSA_depth_max_cnt and recursion
- HSA_depth_max_cnt and recursion is now thread-local static instead of global static
- replace std::string usage with std::string_view

* Actions update

- add dependabot.yml
- use actions/checkout@v4
- install latest libasan and libtsan in sanitizer containers

* Add PTL (Parallel Tasking Library) submodule
2023-09-20 19:32:02 -05:00

531 líneas
18 KiB
C++

// Copyright (c) 2018-2023 Advanced Micro Devices, Inc.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "lib/rocprofiler/hsa/hsa.hpp"
#include "lib/common/defines.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler/buffer.hpp"
#include "lib/rocprofiler/context/context.hpp"
#include "lib/rocprofiler/hsa/details/ostream.hpp"
#include "lib/rocprofiler/hsa/types.hpp"
#include "lib/rocprofiler/hsa/utils.hpp"
#include <rocprofiler/buffer.h>
#include <rocprofiler/callback_tracing.h>
#include <rocprofiler/fwd.h>
#include <glog/logging.h>
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <type_traits>
#include <utility>
namespace rocprofiler
{
namespace hsa
{
namespace
{
std::atomic<activity_functor_t> report_activity = {};
struct null_type
{};
template <typename DataT, typename Tp>
void
set_data_retval(DataT& _data, Tp _val)
{
if constexpr(std::is_same<Tp, null_type>::value)
{
(void) _data;
(void) _val;
}
else if constexpr(std::is_same<Tp, hsa_signal_value_t>::value)
{
_data.hsa_signal_value_t_retval = _val;
}
else if constexpr(std::is_same<Tp, uint64_t>::value)
{
_data.uint64_t_retval = _val;
}
else if constexpr(std::is_same<Tp, uint32_t>::value)
{
_data.uint32_t_retval = _val;
}
else if constexpr(std::is_same<Tp, hsa_status_t>::value)
{
_data.hsa_status_t_retval = _val;
}
else
{
static_assert(std::is_void<Tp>::value, "Error! unsupported return type");
}
}
} // namespace
hsa_api_table_t&
get_table()
{
static auto _core = CoreApiTable{};
static auto _amd_ext = AmdExtTable{};
static auto _img_ext = ImageExtTable{};
static auto _fini_ext = FinalizerExtTable{};
static auto _v = []() {
_core.version = {
HSA_CORE_API_TABLE_MAJOR_VERSION, sizeof(_core), HSA_CORE_API_TABLE_STEP_VERSION, 0};
_amd_ext.version = {HSA_AMD_EXT_API_TABLE_MAJOR_VERSION,
sizeof(_amd_ext),
HSA_AMD_EXT_API_TABLE_STEP_VERSION,
0};
_img_ext.version = {HSA_IMAGE_API_TABLE_MAJOR_VERSION,
sizeof(_img_ext),
HSA_IMAGE_API_TABLE_STEP_VERSION,
0};
_fini_ext.version = {HSA_FINALIZER_API_TABLE_MAJOR_VERSION,
sizeof(_fini_ext),
HSA_FINALIZER_API_TABLE_STEP_VERSION,
0};
auto _version = ApiTableVersion{
HSA_API_TABLE_MAJOR_VERSION, sizeof(HsaApiTable), HSA_API_TABLE_STEP_VERSION, 0};
auto _val = hsa_api_table_t{_version, &_core, &_amd_ext, &_fini_ext, &_img_ext};
return _val;
}();
return _v;
}
template <size_t Idx>
template <typename DataArgsT, typename... Args>
auto
hsa_api_impl<Idx>::set_data_args(DataArgsT& _data_args, Args... args)
{
if constexpr(Idx == ROCPROFILER_HSA_API_ID_hsa_amd_memory_async_copy_rect)
{
auto _tuple = std::make_tuple(args...);
_data_args.dst = std::get<0>(_tuple);
_data_args.dst_offset = std::get<1>(_tuple);
_data_args.src = std::get<2>(_tuple);
_data_args.src_offset = std::get<3>(_tuple);
_data_args.range = std::get<4>(_tuple);
_data_args.range__val = *(std::get<4>(_tuple));
_data_args.copy_agent = std::get<5>(_tuple);
_data_args.dir = std::get<6>(_tuple);
_data_args.num_dep_signals = std::get<7>(_tuple);
_data_args.dep_signals = std::get<8>(_tuple);
_data_args.completion_signal = std::get<9>(_tuple);
}
else
{
_data_args = DataArgsT{args...};
}
}
template <size_t Idx>
template <typename FuncT, typename... Args>
auto
hsa_api_impl<Idx>::exec(FuncT&& _func, Args&&... args)
{
using return_type = std::decay_t<std::invoke_result_t<FuncT, Args...>>;
if(_func)
{
static_assert(std::is_void<return_type>::value || std::is_enum<return_type>::value ||
std::is_integral<return_type>::value,
"Error! unsupported return type");
if constexpr(std::is_void<return_type>::value)
{
_func(std::forward<Args>(args)...);
return null_type{};
}
else
{
return _func(std::forward<Args>(args)...);
}
}
if constexpr(std::is_void<return_type>::value)
return null_type{};
else
return return_type{HSA_STATUS_ERROR};
}
template <size_t Idx>
template <typename... Args>
auto
hsa_api_impl<Idx>::functor(Args&&... args)
{
using info_type = hsa_api_info<Idx>;
LOG(INFO) << __PRETTY_FUNCTION__;
struct callback_context_data
{
context::context* ctx = nullptr;
rocprofiler_callback_tracing_record_t record = {};
};
struct buffered_context_data
{
context::context* ctx = nullptr;
};
auto callback_contexts = std::vector<callback_context_data>{};
auto buffered_contexts = std::vector<buffered_context_data>{};
for(const auto& aitr : context::get_active_contexts())
{
auto* itr = aitr.load();
if(!itr) continue;
if(itr->callback_tracer)
{
// if the given domain + op is not enabled, skip this context
if(!itr->callback_tracer->domains(info_type::callback_domain_idx,
info_type::operation_idx))
continue;
callback_contexts.emplace_back(
callback_context_data{itr, rocprofiler_callback_tracing_record_t{}});
}
if(itr->buffered_tracer)
{
// if the given domain + op is not enabled, skip this context
if(!itr->buffered_tracer->domains(info_type::buffered_domain_idx,
info_type::operation_idx))
continue;
buffered_contexts.emplace_back(buffered_context_data{itr});
}
}
if(callback_contexts.empty() && buffered_contexts.empty())
{
auto _ret = exec(info_type::get_table_func(), std::forward<Args>(args)...);
if constexpr(!std::is_same<decltype(_ret), null_type>::value)
return _ret;
else
return HSA_STATUS_SUCCESS;
}
auto buffer_record = rocprofiler_buffer_tracing_hsa_api_record_t{};
auto tracer_data = rocprofiler_hsa_api_callback_tracer_data_t{};
auto corr_id = context::correlation_tracing_service::get_unique_record_id();
auto thr_id = common::get_tid();
// construct the buffered info before the callback so the callbacks are as closely wrapped
// around the function call as possible
if(!buffered_contexts.empty())
{
buffer_record.kind = info_type::buffered_domain_idx;
buffer_record.correlation_id = rocprofiler_correlation_id_t{corr_id};
buffer_record.operation = info_type::operation_idx;
buffer_record.thread_id = thr_id;
}
// invoke the callbacks
if(!callback_contexts.empty())
{
tracer_data.size = sizeof(rocprofiler_hsa_api_callback_tracer_data_t);
set_data_args(info_type::get_api_data_args(tracer_data.args), std::forward<Args>(args)...);
for(auto& itr : callback_contexts)
{
auto& ctx = itr.ctx;
auto& record = itr.record;
uint64_t extern_corr_id = 0;
auto& _correlation = ctx->correlation_tracer;
if(_correlation.external_id_callback)
{
_correlation.external_id = _correlation.external_id_callback(
info_type::callback_domain_idx, info_type::operation_idx, corr_id);
extern_corr_id = _correlation.external_id;
}
auto user_data = rocprofiler_user_data_t{.value = 0};
record = rocprofiler_callback_tracing_record_t{
thr_id,
rocprofiler_correlation_id_t{corr_id},
rocprofiler_external_correlation_id_t{extern_corr_id},
info_type::callback_domain_idx,
info_type::operation_idx,
ROCPROFILER_SERVICE_CALLBACK_PHASE_ENTER,
user_data,
static_cast<void*>(&tracer_data)};
auto& callback_info =
ctx->callback_tracer->callback_data.at(info_type::callback_domain_idx);
callback_info.callback(record, callback_info.data);
}
}
// record the start timestamp as close to the function call as possible
if(!buffered_contexts.empty())
{
buffer_record.start_timestamp = common::timestamp_ns();
}
auto _ret = exec(info_type::get_table_func(), std::forward<Args>(args)...);
// record the end timestamp as close to the function call as possible
if(!buffered_contexts.empty())
{
buffer_record.end_timestamp = common::timestamp_ns();
}
if(!callback_contexts.empty())
{
set_data_retval(tracer_data.retval, _ret);
for(auto& itr : callback_contexts)
{
auto& ctx = itr.ctx;
auto& record = itr.record;
record.phase = ROCPROFILER_SERVICE_CALLBACK_PHASE_EXIT;
record.payload = static_cast<void*>(&tracer_data);
auto& callback_info =
ctx->callback_tracer->callback_data.at(info_type::callback_domain_idx);
callback_info.callback(record, callback_info.data);
}
}
if(!buffered_contexts.empty())
{
for(auto& itr : buffered_contexts)
{
assert(itr.ctx->buffered_tracer);
auto buffer_id =
itr.ctx->buffered_tracer->buffer_data.at(info_type::buffered_domain_idx);
for(auto& bitr : buffer::get_buffers())
{
if(bitr && bitr->context_id == itr.ctx->context_idx &&
bitr->buffer_id == buffer_id.handle)
{
bitr->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING,
info_type::buffered_domain_idx,
buffer_record);
break;
}
}
}
}
if constexpr(!std::is_same<decltype(_ret), null_type>::value)
return _ret;
else
return HSA_STATUS_SUCCESS;
}
} // namespace hsa
} // namespace rocprofiler
// template specializations
#include "hsa.def.cpp"
namespace rocprofiler
{
namespace hsa
{
namespace
{
template <size_t Idx, size_t... IdxTail>
const char*
name_by_id(const uint32_t id, std::index_sequence<Idx, IdxTail...>)
{
if(Idx == id) return hsa_api_info<Idx>::name;
if constexpr(sizeof...(IdxTail) > 0)
return name_by_id(id, std::index_sequence<IdxTail...>{});
else
return nullptr;
}
template <size_t Idx, size_t... IdxTail>
uint32_t
id_by_name(const char* name, std::index_sequence<Idx, IdxTail...>)
{
if(std::string_view{hsa_api_info<Idx>::name} == std::string_view{name})
return hsa_api_info<Idx>::operation_idx;
if constexpr(sizeof...(IdxTail) > 0)
return id_by_name(name, std::index_sequence<IdxTail...>{});
else
return ROCPROFILER_HSA_API_ID_NONE;
}
template <size_t Idx, size_t... IdxTail>
void
iterate_args(const uint32_t id,
const rocprofiler_hsa_api_callback_tracer_data_t& data,
rocprofiler_callback_tracing_operation_args_cb_t func,
void* user_data,
std::index_sequence<Idx, IdxTail...>)
{
if(Idx == id)
{
using info_type = hsa_api_info<Idx>;
auto&& arg_list = info_type::as_arg_list(data);
auto&& arg_addr = info_type::as_arg_addr(data);
for(size_t i = 0; i < std::min(arg_list.size(), arg_addr.size()); ++i)
{
auto ret = func(info_type::callback_domain_idx, // kind
id, // operation
i, // arg_number
arg_list.at(i).first.c_str(), // arg_name
arg_list.at(i).second.c_str(), // arg_value_str
arg_addr.at(i), // arg_value_addr
user_data);
if(ret != 0) break;
}
}
if constexpr(sizeof...(IdxTail) > 0)
iterate_args(id, data, func, user_data, std::index_sequence<IdxTail...>{});
}
template <size_t... Idx>
void
get_ids(std::vector<uint32_t>& _id_list, std::index_sequence<Idx...>)
{
auto _emplace = [](auto& _vec, uint32_t _v) {
if(_v < ROCPROFILER_HSA_API_ID_LAST) _vec.emplace_back(_v);
};
(_emplace(_id_list, hsa_api_info<Idx>::operation_idx), ...);
}
template <size_t... Idx>
void
get_names(std::vector<const char*>& _name_list, std::index_sequence<Idx...>)
{
auto _emplace = [](auto& _vec, const char* _v) {
if(_v != nullptr && strnlen(_v, 1) > 0) _vec.emplace_back(_v);
};
(_emplace(_name_list, hsa_api_info<Idx>::name), ...);
}
template <size_t... Idx>
void
update_table(hsa_api_table_t* _orig, std::index_sequence<Idx...>)
{
static auto _should_wrap_functor =
[](auto _callback_domain, auto _buffered_domain, auto _operation) {
for(const auto& itr : context::get_registered_contexts())
{
if(!itr) continue;
if(itr->callback_tracer)
{
// domain not enabled so skip to next callback_tracer
if(!itr->callback_tracer->domains(_callback_domain)) continue;
// if the given domain + op is enabled, we need to wrap
if(itr->callback_tracer->domains(_callback_domain, _operation)) return true;
}
if(itr->buffered_tracer)
{
// domain not enabled so skip to next callback_tracer
if(!itr->buffered_tracer->domains(_buffered_domain)) continue;
// if the given domain + op is enabled, we need to wrap
if(itr->buffered_tracer->domains(_buffered_domain, _operation)) return true;
}
}
return false;
};
(void) _should_wrap_functor;
auto _update = [](hsa_api_table_t* _orig_v, auto _info) {
// check to see if there are any contexts which enable this operation in the HSA API domain
if(!_should_wrap_functor(
_info.callback_domain_idx, _info.buffered_domain_idx, _info.operation_idx))
return;
// 1. get the sub-table containing the function pointer
// 2. get reference to function pointer in sub-table
// 3. update function pointer with functor
auto& _table = _info.get_table(_orig_v);
auto& _func = _info.get_table_func(_table);
_func = _info.get_functor(_func);
};
(_update(_orig, hsa_api_info<Idx>{}), ...);
}
} // namespace
// check out the assembly here... this compiles to a switch statement
const char*
name_by_id(uint32_t id)
{
return name_by_id(id, std::make_index_sequence<ROCPROFILER_HSA_API_ID_LAST>{});
}
uint32_t
id_by_name(const char* name)
{
return id_by_name(name, std::make_index_sequence<ROCPROFILER_HSA_API_ID_LAST>{});
}
void
iterate_args(uint32_t id,
const rocprofiler_hsa_api_callback_tracer_data_t& data,
rocprofiler_callback_tracing_operation_args_cb_t callback,
void* user_data)
{
if(callback)
iterate_args(
id, data, callback, user_data, std::make_index_sequence<ROCPROFILER_HSA_API_ID_LAST>{});
}
std::vector<uint32_t>
get_ids()
{
auto _data = std::vector<uint32_t>{};
_data.reserve(ROCPROFILER_HSA_API_ID_LAST);
get_ids(_data, std::make_index_sequence<ROCPROFILER_HSA_API_ID_LAST>{});
return _data;
}
std::vector<const char*>
get_names()
{
auto _data = std::vector<const char*>{};
_data.reserve(ROCPROFILER_HSA_API_ID_LAST);
get_names(_data, std::make_index_sequence<ROCPROFILER_HSA_API_ID_LAST>{});
return _data;
}
void
set_callback(activity_functor_t _func)
{
auto&& _v = report_activity.load();
report_activity.compare_exchange_strong(_v, _func);
}
void
update_table(hsa_api_table_t* _orig)
{
if(_orig) update_table(_orig, std::make_index_sequence<ROCPROFILER_HSA_API_ID_LAST>{});
}
} // namespace hsa
} // namespace rocprofiler