[SDK][rocprofv3] Buffer tracing records with args (HIP) (#285)

* [SDK][rocprofv3] HIP API buffer records with args (ext)

- New buffer tracing domain(s) for HIP APIs which include the arguments and the return value in the buffer records
- Update HIP stream support for extended HIP buffer tracing
- Update rocprofv3 tool library and output library to use extended HIP buffer tracing recods

* Update stream.cpp

- handle hipStream_t address being reused for a new stream

* Update doxygen docs for rocprofiler_iterate_buffer_tracing_record_args

* Update rocprofv3 tool.cpp

- configure buffer tracing services with HIP_*_API_EXT variants
- tweak logging level for hip_stream_display_callback

* Fix validation tests

- add HIP_RUNTIME_API_EXT and HIP_COMPILER_API_EXT to valid domain names

* Serialization support for buffer tracing args

* Disable stream service for __hipPopCallConfiguration

- this is interpreted as a stream create but it doesn't create a stream

* Fix execute_buffer_record_emplace for HIP extended contexts

* Add uint64_t_retval to rocprofiler_hip_api_retval_t union

- reading in hipError_t_retval during serialization of pointer return value causes undefined behavior

* Fix compilation warning about unused but set parameter

- in hip/stream.cpp

* Add synchronization for async_copy_data

* Fix compilation error

* Fix compilation error

---------

Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
This commit is contained in:
Madsen, Jonathan
2025-03-22 19:57:32 -05:00
کامیت شده توسط GitHub
والد 2d072f9217
کامیت e33dff7ad0
30فایلهای تغییر یافته به همراه548 افزوده شده و 118 حذف شده
@@ -25,6 +25,7 @@
#include <rocprofiler-sdk/agent.h>
#include <rocprofiler-sdk/defines.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/hip/api_args.h>
#include <rocprofiler-sdk/kfd/page_migration_args.h>
#include <stdint.h>
@@ -84,7 +85,30 @@ typedef struct
} rocprofiler_buffer_tracing_hip_api_record_t;
/**
* @brief Additional trace data for OMPT target routines
* @brief ROCProfiler Buffer HIP API Tracer Record.
*/
typedef struct
{
uint64_t size; ///< size of this struct
rocprofiler_buffer_tracing_kind_t kind;
rocprofiler_tracing_operation_t operation;
rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
rocprofiler_thread_id_t thread_id; ///< id for thread generating this record
rocprofiler_hip_api_args_t args; ///< arguments of function call
rocprofiler_hip_api_retval_t retval; ///< return value of function call
/// @var kind
/// @brief ::ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API or
/// ::ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API
/// @var operation
/// @brief Specification of the API function, e.g., ::rocprofiler_hip_runtime_api_id_t or
/// ::rocprofiler_hip_compiler_api_id_t
} rocprofiler_buffer_tracing_hip_api_ext_record_t;
/**
* @brief Additional trace data for OpenMP target routines
*/
typedef struct rocprofiler_buffer_tracing_ompt_target_t
@@ -494,6 +518,53 @@ rocprofiler_iterate_buffer_tracing_kind_operations(
rocprofiler_buffer_tracing_kind_operation_cb_t callback,
void* data) ROCPROFILER_API ROCPROFILER_NONNULL(2);
/**
* @brief Callback function for iterating over the function arguments to a traced function.
* This function will be invoked for each argument.
* @see rocprofiler_iterate_buffer_tracing_record_args
*
* @param [in] kind domain
* @param [in] operation associated domain operation
* @param [in] arg_number the argument number, starting at zero
* @param [in] arg_value_addr the address of the argument stored by rocprofiler.
* @param [in] arg_indirection_count the total number of indirection levels for the argument, e.g.
* int == 0, int* == 1, int** == 2
* @param [in] arg_type the typeid name of the argument (not demangled)
* @param [in] arg_name the name of the argument in the prototype (or rocprofiler union)
* @param [in] arg_value_str conversion of the argument to a string, e.g. operator<< overload
* @param [in] data user data
*/
typedef int (*rocprofiler_buffer_tracing_operation_args_cb_t)(
rocprofiler_buffer_tracing_kind_t kind,
rocprofiler_tracing_operation_t operation,
uint32_t arg_number,
const void* const arg_value_addr,
int32_t arg_indirection_count,
const char* arg_type,
const char* arg_name,
const char* arg_value_str,
void* data);
/**
* @brief Iterates over all the arguments for the traced function (when available). This is
* particularly useful when tools want to annotate traces with the function arguments. See
* @example samples/api_buffer_tracing/client.cpp for a usage example.
*
* In contrast to ::rocprofiler_iterate_callback_tracing_kind_operation_args, this function
* cannot dereference pointer arguments since there is a high probability that the pointer
* address references the stack and the buffer tracing record is delivered after the
* stack variables of the corresponding function have been destroyed.
*
* @param[in] record Buffer record
* @param[in] callback The callback function which will be invoked for each argument
* @param[in] user_data Data to be passed to each invocation of the callback
*/
rocprofiler_status_t
rocprofiler_iterate_buffer_tracing_record_args(
rocprofiler_record_header_t record,
rocprofiler_buffer_tracing_operation_args_cb_t callback,
void* user_data) ROCPROFILER_API ROCPROFILER_NONNULL(2);
/** @} */
ROCPROFILER_EXTERN_C_FINI
@@ -85,8 +85,69 @@
# define ROCPROFILER_SDK_CEREAL_NAMESPACE_END } // namespace cereal
#endif
namespace rocprofiler
{
namespace sdk
{
namespace serialization
{
struct buffer_tracing_args
{
std::string type = {};
std::string name = {};
std::string value = {};
};
template <typename Tp>
auto
get_buffer_tracing_args(Tp& data)
{
auto populate_args_array = [](rocprofiler_buffer_tracing_kind_t /*kind*/,
rocprofiler_tracing_operation_t /*operation*/,
uint32_t arg_number,
const void* const /*arg_value_addr*/,
int32_t /*arg_indirection_count*/,
const char* arg_type,
const char* arg_name,
const char* arg_value_str,
void* cb_data) -> int {
if(!cb_data) return 1;
auto* vec = static_cast<std::vector<buffer_tracing_args>*>(cb_data);
auto sz = std::max<size_t>(arg_number + 1, vec->size());
vec->resize(sz, buffer_tracing_args{});
vec->at(arg_number) = buffer_tracing_args{arg_type, arg_name, arg_value_str};
return 0;
};
auto ret = std::vector<buffer_tracing_args>{};
auto record = rocprofiler_record_header_t{};
record.hash =
rocprofiler_record_header_compute_hash(ROCPROFILER_BUFFER_CATEGORY_TRACING, data.kind);
record.payload = &data;
rocprofiler_iterate_buffer_tracing_record_args(record, populate_args_array, &ret);
return ret;
}
} // namespace serialization
} // namespace sdk
} // namespace rocprofiler
ROCPROFILER_SDK_CEREAL_NAMESPACE_BEGIN
namespace sdk = ::rocprofiler::sdk;
template <typename ArchiveT>
void
save(ArchiveT& ar, const sdk::serialization::buffer_tracing_args& data)
{
ROCP_SDK_SAVE_DATA_FIELD(type);
ROCP_SDK_SAVE_DATA_FIELD(name);
ROCP_SDK_SAVE_DATA_FIELD(value);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_context_id_t data)
@@ -316,7 +377,7 @@ template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_hip_api_retval_t data)
{
ROCP_SDK_SAVE_DATA_FIELD(hipError_t_retval);
ROCP_SDK_SAVE_DATA_FIELD(uint64_t_retval);
}
template <typename ArchiveT>
@@ -513,6 +574,16 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_hip_api_record_t data)
save_buffer_tracing_api_record(ar, data);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_buffer_tracing_hip_api_ext_record_t data)
{
save_buffer_tracing_api_record(ar, data);
auto args = sdk::serialization::get_buffer_tracing_args(data);
ROCP_SDK_SAVE_VALUE("args", args);
ROCP_SDK_SAVE_DATA_FIELD(retval);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_buffer_tracing_marker_api_record_t data)
@@ -214,7 +214,16 @@ typedef enum // NOLINT(performance-enum-size)
ROCPROFILER_BUFFER_TRACING_ROCDECODE_API, ///< rocDecode tracing
ROCPROFILER_BUFFER_TRACING_ROCJPEG_API, ///< rocJPEG tracing
ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API, ///< Display HIP Stream
ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT,
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT,
ROCPROFILER_BUFFER_TRACING_LAST,
/// @var ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT
/// @brief Similar to ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API except the buffer record
/// contains the function argument(s) and return value
/// @var ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT
/// @brief Similar to ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API except the buffer record
/// contains the function argument(s) and return value
} rocprofiler_buffer_tracing_kind_t;
/**
@@ -35,6 +35,8 @@
#include <hip/amd_detail/amd_hip_gl_interop.h>
#include <hip/amd_detail/hip_api_trace.hpp>
#include <stdint.h>
ROCPROFILER_EXTERN_C_INIT
// Empty struct has a size of 0 in C but size of 1 in C++.
@@ -52,6 +54,7 @@ typedef union rocprofiler_hip_api_retval_t
~rocprofiler_hip_api_retval_t() = default;
#endif
uint64_t uint64_t_retval;
int int_retval;
const char* const_charp_retval;
hipError_t hipError_t_retval;
@@ -138,7 +138,7 @@ buffered_output<Tp, DomainT>::destroy()
}
using hip_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_hip_api_record_t, domain_type::HIP>;
buffered_output<rocprofiler_buffer_tracing_hip_api_ext_record_t, domain_type::HIP>;
using hsa_buffered_output_t =
buffered_output<rocprofiler_buffer_tracing_hsa_api_record_t, domain_type::HSA>;
using marker_buffered_output_t =
@@ -316,10 +316,10 @@ generate_csv(const output_config& cfg,
}
void
generate_csv(const output_config& cfg,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& data,
const stats_entry_t& stats)
generate_csv(const output_config& cfg,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>& data,
const stats_entry_t& stats)
{
if(data.empty()) return;
@@ -46,10 +46,10 @@ generate_csv(const output_config& cfg,
const stats_entry_t& stats);
void
generate_csv(const output_config& cfg,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& data,
const stats_entry_t& stats);
generate_csv(const output_config& cfg,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>& data,
const stats_entry_t& stats);
void
generate_csv(const output_config& cfg,
@@ -187,7 +187,7 @@ write_json(json_output& json_ar,
const output_config& /*cfg*/,
const metadata& /*tool_metadata*/,
const domain_stats_vec_t& domain_stats,
generator<rocprofiler_buffer_tracing_hip_api_record_t>&& hip_api_gen,
generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>&& hip_api_gen,
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t> kernel_dispatch_gen,
generator<tool_buffer_tracing_memory_copy_with_stream_record_t> memory_copy_gen,
@@ -85,7 +85,7 @@ write_json(json_output& j
const output_config& cfg,
const metadata& tool_metadata,
const domain_stats_vec_t& domain_stats,
generator<rocprofiler_buffer_tracing_hip_api_record_t>&& hip_api_gen,
generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>&& hip_api_gen,
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t> kernel_dispatch_gen,
generator<tool_buffer_tracing_memory_copy_with_stream_record_t> memory_copy_gen,
@@ -360,7 +360,7 @@ write_otf2(
const metadata& tool_metadata,
uint64_t pid,
const std::vector<agent_info>& agent_data,
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_data,
std::deque<rocprofiler_buffer_tracing_hip_api_ext_record_t>* hip_api_data,
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
std::deque<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>* kernel_dispatch_data,
std::deque<tool_buffer_tracing_memory_copy_with_stream_record_t>* memory_copy_data,
@@ -40,7 +40,7 @@ write_otf2(
const metadata& tool_metadata,
uint64_t pid,
const std::vector<agent_info>& agent_data,
std::deque<rocprofiler_buffer_tracing_hip_api_record_t>* hip_api_data,
std::deque<rocprofiler_buffer_tracing_hip_api_ext_record_t>* hip_api_data,
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
std::deque<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>* kernel_dispatch_data,
std::deque<tool_buffer_tracing_memory_copy_with_stream_record_t>* memory_copy_data,
@@ -68,7 +68,7 @@ write_perfetto(
const output_config& ocfg,
const metadata& tool_metadata,
std::vector<agent_info> agent_data,
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& hip_api_gen,
const generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>& hip_api_gen,
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& kernel_dispatch_gen,
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& memory_copy_gen,
@@ -40,7 +40,7 @@ write_perfetto(
const output_config& cfg,
const metadata& tool_metadata,
std::vector<agent_info> agent_data,
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& hip_api_gen,
const generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>& hip_api_gen,
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& kernel_dispatch_gen,
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& memory_copy_gen,
@@ -83,8 +83,8 @@ generate_stats(const output_config& /*cfg*/,
stats_entry_t
generate_stats(const output_config& /*cfg*/,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& data)
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>& data)
{
auto hip_stats = stats_map_t{};
for(auto ditr : data)
@@ -37,9 +37,9 @@ generate_stats(const output_config& cfg,
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& data);
stats_entry_t
generate_stats(const output_config& cfg,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_hip_api_record_t>& data);
generate_stats(const output_config& cfg,
const metadata& tool_metadata,
const generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>& data);
stats_entry_t
generate_stats(const output_config& cfg,
@@ -541,12 +541,12 @@ hip_stream_display_callback(rocprofiler_callback_tracing_record_t record,
// STREAM_HANDLE_CREATE and DESTROY are no-ops
if(record.operation == ROCPROFILER_HIP_STREAM_CREATE)
{
ROCP_INFO
ROCP_TRACE
<< "Entered hip_stream_display_callback function for ROCPROFILER_HIP_STREAM_CREATE";
}
else if(record.operation == ROCPROFILER_HIP_STREAM_DESTROY)
{
ROCP_INFO
ROCP_TRACE
<< "Entered hip_stream_display_callback function for ROCPROFILER_HIP_STREAM_DESTROY";
}
else if(record.operation == ROCPROFILER_HIP_STREAM_SET)
@@ -554,15 +554,15 @@ hip_stream_display_callback(rocprofiler_callback_tracing_record_t record,
// Push the stream ID onto the stream stack when before underlying HIP function is called
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
{
ROCP_INFO << "Entered hip_stream_display_callback function for "
"ROCPROFILER_HIP_STREAM_SET with ROCPROFILER_CALLBACK_PHASE_ENTER";
ROCP_TRACE << "Entered hip_stream_display_callback function for "
"ROCPROFILER_HIP_STREAM_SET with ROCPROFILER_CALLBACK_PHASE_ENTER";
rocprofiler::tool::stream::push_stream_id(stream_id);
}
// Pop stream ID off of stream stack after underlying HIP function is completed
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
{
ROCP_INFO << "Entered hip_stream_display_callback function for "
"ROCPROFILER_HIP_STREAM_SET with ROCPROFILER_CALLBACK_PHASE_EXIT";
ROCP_TRACE << "Entered hip_stream_display_callback function for "
"ROCPROFILER_HIP_STREAM_SET with ROCPROFILER_CALLBACK_PHASE_EXIT";
rocprofiler::tool::stream::pop_stream_id();
}
}
@@ -948,11 +948,11 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
tool::write_ring_buffer(*record, domain_type::SCRATCH_MEMORY);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API ||
header->kind == ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API)
else if(header->kind == ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT ||
header->kind == ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT)
{
auto* record =
static_cast<rocprofiler_buffer_tracing_hip_api_record_t*>(header->payload);
static_cast<rocprofiler_buffer_tracing_hip_api_ext_record_t*>(header->payload);
tool::write_ring_buffer(*record, domain_type::HIP);
}
@@ -979,10 +979,18 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
}
else
{
ROCP_FATAL << fmt::format(
"unsupported category + kind: {} + {}", header->category, header->kind);
ROCP_CI_LOG(WARNING) << fmt::format(
"unsupported ROCPROFILER_BUFFER_CATEGORY_TRACING kind: {} :: {}",
header->kind,
tool_metadata->get_kind_name(
static_cast<rocprofiler_buffer_tracing_kind_t>(header->kind)));
}
}
else
{
ROCP_CI_LOG(WARNING) << fmt::format(
"unsupported category + kind: {} + {}", header->category, header->kind);
}
}
}
@@ -1682,7 +1690,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
{
ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service(
get_client_ctx(),
ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API,
ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT,
nullptr,
0,
get_buffers().hip_api_trace),
@@ -1693,7 +1701,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
{
ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service(
get_client_ctx(),
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API,
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT,
nullptr,
0,
get_buffers().hip_api_trace),
@@ -39,6 +39,7 @@
#include "lib/rocprofiler-sdk/rocjpeg/rocjpeg.hpp"
#include "lib/rocprofiler-sdk/runtime_initialization.hpp"
#include <rocprofiler-sdk/buffer_tracing.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/hip/table_id.h>
#include <rocprofiler-sdk/hsa/table_id.h>
@@ -99,6 +100,8 @@ ROCPROFILER_BUFFER_TRACING_KIND_STRING(RUNTIME_INITIALIZATION)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(ROCDECODE_API)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(ROCJPEG_API)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(HIP_STREAM_API)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(HIP_RUNTIME_API_EXT)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(HIP_COMPILER_API_EXT)
template <size_t Idx, size_t... Tail>
std::pair<const char*, size_t>
@@ -263,11 +266,13 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_
break;
}
case ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API:
case ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT:
{
val = rocprofiler::hip::name_by_id<ROCPROFILER_HIP_TABLE_ID_Runtime>(operation);
break;
}
case ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API:
case ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT:
{
val = rocprofiler::hip::name_by_id<ROCPROFILER_HIP_TABLE_ID_Compiler>(operation);
break;
@@ -410,11 +415,13 @@ rocprofiler_iterate_buffer_tracing_kind_operations(
break;
}
case ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API:
case ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT:
{
ops = rocprofiler::hip::get_ids<ROCPROFILER_HIP_TABLE_ID_Runtime>();
break;
}
case ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API:
case ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT:
{
ops = rocprofiler::hip::get_ids<ROCPROFILER_HIP_TABLE_ID_Compiler>();
break;
@@ -467,6 +474,54 @@ rocprofiler_iterate_buffer_tracing_kind_operations(
}
return ROCPROFILER_STATUS_SUCCESS;
}
rocprofiler_status_t
rocprofiler_iterate_buffer_tracing_record_args(
rocprofiler_record_header_t record,
rocprofiler_buffer_tracing_operation_args_cb_t callback,
void* user_data)
{
switch(record.kind)
{
case ROCPROFILER_BUFFER_TRACING_NONE:
case ROCPROFILER_BUFFER_TRACING_LAST:
{
return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
}
case ROCPROFILER_BUFFER_TRACING_HSA_CORE_API:
case ROCPROFILER_BUFFER_TRACING_HSA_AMD_EXT_API:
case ROCPROFILER_BUFFER_TRACING_HSA_IMAGE_EXT_API:
case ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API:
case ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API:
case ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API:
case ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API:
case ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY:
case ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH:
case ROCPROFILER_BUFFER_TRACING_MEMORY_COPY:
case ROCPROFILER_BUFFER_TRACING_RCCL_API:
{
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
}
case ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT:
{
auto* _payload =
static_cast<rocprofiler_buffer_tracing_hip_api_ext_record_t*>(record.payload);
rocprofiler::hip::iterate_args<ROCPROFILER_HIP_TABLE_ID_Compiler>(
_payload->operation, _payload->args, callback, user_data);
return ROCPROFILER_STATUS_SUCCESS;
}
case ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT:
{
auto* _payload =
static_cast<rocprofiler_buffer_tracing_hip_api_ext_record_t*>(record.payload);
rocprofiler::hip::iterate_args<ROCPROFILER_HIP_TABLE_ID_Runtime>(
_payload->operation, _payload->args, callback, user_data);
return ROCPROFILER_STATUS_SUCCESS;
}
}
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
}
}
#undef RETURN_STATUS_ON_FAIL
@@ -548,7 +548,7 @@ rocprofiler_iterate_callback_tracing_kind_operation_args(
{
rocprofiler::hip::iterate_args<ROCPROFILER_HIP_TABLE_ID_Compiler>(
record.operation,
*static_cast<rocprofiler_callback_tracing_hip_api_data_t*>(record.payload),
static_cast<rocprofiler_callback_tracing_hip_api_data_t*>(record.payload)->args,
callback,
max_deref,
user_data);
@@ -558,7 +558,7 @@ rocprofiler_iterate_callback_tracing_kind_operation_args(
{
rocprofiler::hip::iterate_args<ROCPROFILER_HIP_TABLE_ID_Runtime>(
record.operation,
*static_cast<rocprofiler_callback_tracing_hip_api_data_t*>(record.payload),
static_cast<rocprofiler_callback_tracing_hip_api_data_t*>(record.payload)->args,
callback,
max_deref,
user_data);
@@ -135,6 +135,12 @@ struct context
template <typename KindT>
bool is_tracing(KindT _kind) const;
template <typename... Args>
bool is_tracing_one_of(Args... _args) const
{
return ((false || is_tracing(_args)), ...);
}
};
// set the client index needs to be called before allocate_context()
@@ -93,9 +93,13 @@
return &base_type::functor<RetT, Args...>; \
} \
\
static std::vector<void*> as_arg_addr(callback_data_type) { return std::vector<void*>{}; } \
static std::vector<void*> as_arg_addr(rocprofiler_hip_api_args_t) \
{ \
return std::vector<void*>{}; \
} \
\
static std::vector<common::stringified_argument> as_arg_list(callback_data_type, int32_t) \
static std::vector<common::stringified_argument> as_arg_list(rocprofiler_hip_api_args_t, \
int32_t) \
{ \
return {}; \
} \
@@ -177,17 +181,16 @@
return common::mpl::function_args_t<func_t>{}; \
} \
\
static std::vector<void*> as_arg_addr(callback_data_type trace_data) \
static std::vector<void*> as_arg_addr(rocprofiler_hip_api_args_t args) \
{ \
return std::vector<void*>{ \
GET_ADDR_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)}; \
GET_ADDR_MEMBER_FIELDS(get_api_data_args(args), __VA_ARGS__)}; \
} \
\
static auto as_arg_list(callback_data_type trace_data, int32_t max_deref) \
static auto as_arg_list(rocprofiler_hip_api_args_t args, int32_t max_deref) \
{ \
return utils::stringize( \
max_deref, \
GET_NAMED_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)); \
max_deref, GET_NAMED_MEMBER_FIELDS(get_api_data_args(args), __VA_ARGS__)); \
} \
}; \
} \
@@ -22,11 +22,14 @@
#include "lib/rocprofiler-sdk/hip/hip.hpp"
#include "lib/common/defines.hpp"
#include "lib/common/logging.hpp"
#include "lib/common/mpl.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/buffer.hpp"
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/hip/utils.hpp"
#include "lib/rocprofiler-sdk/registration.hpp"
#include "lib/rocprofiler-sdk/tracing/fwd.hpp"
#include "lib/rocprofiler-sdk/tracing/tracing.hpp"
#include <rocprofiler-sdk/buffer.h>
@@ -185,6 +188,7 @@ hip_api_impl<TableIdx, OpIdx>::functor(Args... args)
using info_type = hip_api_info<TableIdx, OpIdx>;
using callback_api_data_t = typename hip_domain_info<TableIdx>::callback_data_type;
using buffered_api_data_t = typename hip_domain_info<TableIdx>::buffered_data_type;
using buffered_ext_data_t = typename hip_domain_info<TableIdx>::buffered_ext_data_type;
constexpr auto external_corr_id_domain_idx =
hip_domain_info<TableIdx>::external_correlation_id_domain_idx;
@@ -202,6 +206,7 @@ hip_api_impl<TableIdx, OpIdx>::functor(Args... args)
auto thr_id = common::get_tid();
auto callback_contexts = tracing::callback_context_data_vec_t{};
auto buffered_contexts = tracing::buffered_context_data_vec_t{};
auto extended_contexts = tracing::buffered_context_data_vec_t{};
auto external_corr_ids = tracing::external_correlation_id_map_t{};
tracing::populate_contexts(info_type::callback_domain_idx,
@@ -211,7 +216,12 @@ hip_api_impl<TableIdx, OpIdx>::functor(Args... args)
buffered_contexts,
external_corr_ids);
if(callback_contexts.empty() && buffered_contexts.empty())
tracing::populate_contexts(info_type::buffered_ext_domain_idx,
info_type::operation_idx,
extended_contexts,
external_corr_ids);
if(callback_contexts.empty() && buffered_contexts.empty() && extended_contexts.empty())
{
[[maybe_unused]] auto _ret = exec(info_type::get_table_func(), std::forward<Args>(args)...);
if constexpr(!std::is_void<RetT>::value)
@@ -221,6 +231,7 @@ hip_api_impl<TableIdx, OpIdx>::functor(Args... args)
}
auto buffer_record = common::init_public_api_struct(buffered_api_data_t{});
auto extended_record = common::init_public_api_struct(buffered_ext_data_t{});
auto tracer_data = common::init_public_api_struct(callback_api_data_t{});
auto* corr_id = tracing::correlation_service::construct(ref_count);
auto internal_corr_id = corr_id->internal;
@@ -232,12 +243,16 @@ hip_api_impl<TableIdx, OpIdx>::functor(Args... args)
info_type::operation_idx,
internal_corr_id);
// invoke the callbacks
if(!callback_contexts.empty())
// set the arguments
if(!callback_contexts.empty() || !extended_contexts.empty())
{
set_data_args(info_type::get_api_data_args(tracer_data.args),
convert_arg_type(std::forward<Args>(args))...);
}
// invoke the callbacks
if(!callback_contexts.empty())
{
tracing::execute_phase_enter_callbacks(callback_contexts,
thr_id,
internal_corr_id,
@@ -253,7 +268,7 @@ hip_api_impl<TableIdx, OpIdx>::functor(Args... args)
external_corr_ids, thr_id, external_corr_id_domain_idx);
// record the start timestamp as close to the function call as possible
if(!buffered_contexts.empty())
if(!buffered_contexts.empty() || !extended_contexts.empty())
{
buffer_record.start_timestamp = common::timestamp_ns();
}
@@ -264,15 +279,18 @@ hip_api_impl<TableIdx, OpIdx>::functor(Args... args)
auto _ret = exec(info_type::get_table_func(), std::forward<Args>(args)...);
// record the end timestamp as close to the function call as possible
if(!buffered_contexts.empty())
if(!buffered_contexts.empty() || !extended_contexts.empty())
{
buffer_record.end_timestamp = common::timestamp_ns();
}
if(!callback_contexts.empty())
if(!callback_contexts.empty() || !extended_contexts.empty())
{
set_data_retval(tracer_data.retval, _ret);
}
if(!callback_contexts.empty())
{
tracing::execute_phase_exit_callbacks(callback_contexts,
external_corr_ids,
info_type::callback_domain_idx,
@@ -292,6 +310,23 @@ hip_api_impl<TableIdx, OpIdx>::functor(Args... args)
buffer_record);
}
if(!extended_contexts.empty())
{
extended_record.start_timestamp = buffer_record.start_timestamp;
extended_record.end_timestamp = buffer_record.end_timestamp;
extended_record.args = tracer_data.args;
extended_record.retval = tracer_data.retval;
tracing::execute_buffer_record_emplace(extended_contexts,
thr_id,
internal_corr_id,
external_corr_ids,
ancestor_corr_id,
info_type::buffered_ext_domain_idx,
info_type::operation_idx,
extended_record);
}
// decrement the reference count after usage in the callback/buffers
corr_id->sub_ref_count();
@@ -360,13 +395,13 @@ get_names(std::vector<const char*>& _name_list, std::index_sequence<OpIdx, OpIdx
get_names<TableIdx>(_name_list, std::index_sequence<OpIdxTail...>{});
}
template <size_t TableIdx, typename DataT, size_t OpIdx, size_t... OpIdxTail>
template <size_t TableIdx, typename DataT, typename FuncT, size_t OpIdx, size_t... OpIdxTail>
void
iterate_args(const uint32_t id,
const DataT& data,
rocprofiler_callback_tracing_operation_args_cb_t func,
int32_t max_deref,
void* user_data,
iterate_args(const uint32_t id,
const DataT& data,
FuncT func,
int32_t max_deref,
void* user_data,
std::index_sequence<OpIdx, OpIdxTail...>)
{
if(OpIdx == id)
@@ -376,16 +411,42 @@ iterate_args(const uint32_t id,
auto&& arg_addr = info_type::as_arg_addr(data);
for(size_t i = 0; i < std::min(arg_list.size(), arg_addr.size()); ++i)
{
auto ret = func(info_type::callback_domain_idx, // kind
id, // operation
i, // arg_number
arg_addr.at(i), // arg_value_addr
arg_list.at(i).indirection_level, // indirection
arg_list.at(i).type, // arg_type
arg_list.at(i).name, // arg_name
arg_list.at(i).value.c_str(), // arg_value_str
arg_list.at(i).dereference_count, // num deref in str
user_data);
using return_type = typename common::mpl::function_traits<FuncT>::result_type;
auto ret = return_type{};
if constexpr(std::is_same<FuncT,
rocprofiler_callback_tracing_operation_args_cb_t>::value)
{
ret = func(info_type::callback_domain_idx, // kind
id, // operation
i, // arg_number
arg_addr.at(i), // arg_value_addr
arg_list.at(i).indirection_level, // indirection
arg_list.at(i).type, // arg_type
arg_list.at(i).name, // arg_name
arg_list.at(i).value.c_str(), // arg_value_str
arg_list.at(i).dereference_count, // num deref in str
user_data);
}
else if constexpr(std::is_same<FuncT,
rocprofiler_buffer_tracing_operation_args_cb_t>::value)
{
ret = func(info_type::buffered_ext_domain_idx, // kind
id, // operation
i, // arg_number
arg_addr.at(i), // arg_value_addr
arg_list.at(i).indirection_level, // indirection
arg_list.at(i).type, // arg_type
arg_list.at(i).name, // arg_name
arg_list.at(i).value.c_str(), // arg_value_str
user_data);
}
else
{
static_assert(common::mpl::assert_false<FuncT>::value,
"Error! unsupported callback type");
}
if(ret != 0) break;
}
return;
@@ -398,6 +459,7 @@ iterate_args(const uint32_t id,
bool
should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain,
rocprofiler_buffer_tracing_kind_t _buffered_domain,
rocprofiler_buffer_tracing_kind_t _buffered_ext_domain,
int _operation)
{
// we loop over all the *registered* contexts and see if any of them, at any point in time,
@@ -415,6 +477,11 @@ should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain,
if(itr->buffered_tracer && itr->buffered_tracer->domains(_buffered_domain) &&
itr->buffered_tracer->domains(_buffered_domain, _operation))
return true;
// if there is a buffered tracer enabled for the given domain and op, we need to wrap
if(itr->buffered_tracer && itr->buffered_tracer->domains(_buffered_ext_domain) &&
itr->buffered_tracer->domains(_buffered_ext_domain, _operation))
return true;
}
return false;
}
@@ -473,8 +540,10 @@ update_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
if(_info.offset() >= _orig->size) return;
// check to see if there are any contexts which enable this operation in the HIP API domain
if(!should_wrap_functor(
_info.callback_domain_idx, _info.buffered_domain_idx, _info.operation_idx))
if(!should_wrap_functor(_info.callback_domain_idx,
_info.buffered_domain_idx,
_info.buffered_ext_domain_idx,
_info.operation_idx))
return;
ROCP_TRACE << "updating table entry for " << _info.name;
@@ -546,11 +615,11 @@ get_names()
template <size_t TableIdx>
void
iterate_args(uint32_t id,
const rocprofiler_callback_tracing_hip_api_data_t& data,
rocprofiler_callback_tracing_operation_args_cb_t callback,
int32_t max_deref,
void* user_data)
iterate_args(uint32_t id,
const rocprofiler_hip_api_args_t& data,
rocprofiler_callback_tracing_operation_args_cb_t callback,
int32_t max_deref,
void* user_data)
{
if(callback)
iterate_args<TableIdx>(id,
@@ -561,6 +630,22 @@ iterate_args(uint32_t id,
std::make_index_sequence<hip_domain_info<TableIdx>::last>{});
}
template <size_t TableIdx>
void
iterate_args(uint32_t id,
const rocprofiler_hip_api_args_t& data,
rocprofiler_buffer_tracing_operation_args_cb_t callback,
void* user_data)
{
if(callback)
iterate_args<TableIdx>(id,
data,
callback,
0,
user_data,
std::make_index_sequence<hip_domain_info<TableIdx>::last>{});
}
template <typename TableT>
void
copy_table(TableT* _orig, uint64_t _tbl_instance)
@@ -580,8 +665,9 @@ update_table(TableT* _orig)
update_table<TableIdx>(_orig, std::make_index_sequence<hip_domain_info<TableIdx>::last>{});
}
using hip_api_data_t = rocprofiler_callback_tracing_hip_api_data_t;
using hip_api_data_t = rocprofiler_hip_api_args_t;
using hip_op_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t;
using hip_op_args_bf_t = rocprofiler_buffer_tracing_operation_args_cb_t;
#define INSTANTIATE_HIP_TABLE_FUNC(TABLE_TYPE, TABLE_IDX) \
template void copy_table<TABLE_TYPE>(TABLE_TYPE * _tbl, uint64_t _instv); \
@@ -591,7 +677,8 @@ using hip_op_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t;
template std::vector<uint32_t> get_ids<TABLE_IDX>(); \
template std::vector<const char*> get_names<TABLE_IDX>(); \
template void iterate_args<TABLE_IDX>( \
uint32_t, const hip_api_data_t&, hip_op_args_cb_t, int32_t, void*);
uint32_t, const hip_api_data_t&, hip_op_args_cb_t, int32_t, void*); \
template void iterate_args<TABLE_IDX>(uint32_t, const hip_api_data_t&, hip_op_args_bf_t, void*);
INSTANTIATE_HIP_TABLE_FUNC(hip_runtime_api_table_t, ROCPROFILER_HIP_TABLE_ID_Runtime)
INSTANTIATE_HIP_TABLE_FUNC(hip_compiler_api_table_t, ROCPROFILER_HIP_TABLE_ID_Compiler)
@@ -36,20 +36,22 @@ namespace hip
template <>
struct hip_domain_info<ROCPROFILER_HIP_TABLE_ID_LAST>
{
using args_type = rocprofiler_hip_api_args_t;
using retval_type = rocprofiler_hip_api_retval_t;
using callback_data_type = rocprofiler_callback_tracing_hip_api_data_t;
using buffered_data_type = rocprofiler_buffer_tracing_hip_api_record_t;
using args_type = rocprofiler_hip_api_args_t;
using retval_type = rocprofiler_hip_api_retval_t;
using callback_data_type = rocprofiler_callback_tracing_hip_api_data_t;
using buffered_data_type = rocprofiler_buffer_tracing_hip_api_record_t;
using buffered_ext_data_type = rocprofiler_buffer_tracing_hip_api_ext_record_t;
};
template <>
struct hip_domain_info<ROCPROFILER_HIP_TABLE_ID_Runtime>
: hip_domain_info<ROCPROFILER_HIP_TABLE_ID_LAST>
{
static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API;
static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API;
static constexpr auto none = ROCPROFILER_HIP_RUNTIME_API_ID_NONE;
static constexpr auto last = ROCPROFILER_HIP_RUNTIME_API_ID_LAST;
static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API;
static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API;
static constexpr auto buffered_ext_domain_idx = ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT;
static constexpr auto none = ROCPROFILER_HIP_RUNTIME_API_ID_NONE;
static constexpr auto last = ROCPROFILER_HIP_RUNTIME_API_ID_LAST;
static constexpr auto external_correlation_id_domain_idx =
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HIP_RUNTIME_API;
};
@@ -58,10 +60,11 @@ template <>
struct hip_domain_info<ROCPROFILER_HIP_TABLE_ID_Compiler>
: hip_domain_info<ROCPROFILER_HIP_TABLE_ID_LAST>
{
static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API;
static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API;
static constexpr auto none = ROCPROFILER_HIP_COMPILER_API_ID_NONE;
static constexpr auto last = ROCPROFILER_HIP_COMPILER_API_ID_LAST;
static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API;
static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API;
static constexpr auto buffered_ext_domain_idx = ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT;
static constexpr auto none = ROCPROFILER_HIP_COMPILER_API_ID_NONE;
static constexpr auto last = ROCPROFILER_HIP_COMPILER_API_ID_LAST;
static constexpr auto external_correlation_id_domain_idx =
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HIP_COMPILER_API;
};
@@ -91,11 +91,18 @@ get_ids();
template <size_t TableIdx>
void
iterate_args(uint32_t id,
const rocprofiler_callback_tracing_hip_api_data_t& data,
rocprofiler_callback_tracing_operation_args_cb_t callback,
int32_t max_deref,
void* user_data);
iterate_args(uint32_t id,
const rocprofiler_hip_api_args_t& data,
rocprofiler_callback_tracing_operation_args_cb_t callback,
int32_t max_deref,
void* user_data);
template <size_t TableIdx>
void
iterate_args(uint32_t id,
const rocprofiler_hip_api_args_t& data,
rocprofiler_buffer_tracing_operation_args_cb_t callback,
void* user_data);
template <typename TableT>
void
@@ -34,12 +34,14 @@
#include "lib/rocprofiler-sdk/hsa/queue_controller.hpp"
#include "lib/rocprofiler-sdk/registration.hpp"
#include "lib/rocprofiler-sdk/tracing/tracing.hpp"
#include "rocprofiler-sdk/hip/compiler_api_id.h"
#include <rocprofiler-sdk/buffer.h>
#include <rocprofiler-sdk/callback_tracing.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/hip/runtime_api_id.h>
#include <rocprofiler-sdk/hip/table_id.h>
#include <rocprofiler-sdk/cxx/utility.hpp>
#include <hip/driver_types.h>
#include <hip/hip_runtime_api.h>
@@ -79,16 +81,26 @@ add_stream(hipStream_t stream)
{
return get_stream_map()->wlock(
[](stream_map_t& _data, hipStream_t _stream) {
if(_data.count(_stream) == 0)
static uint64_t idx_offset = 0;
auto idx = _data.size() + idx_offset;
ROCP_INFO << fmt::format(
"hipStream_t={} :: id={}.handle={}{}", static_cast<void*>(_stream), '{', idx, '}');
ROCP_CI_LOG_IF(WARNING, idx == 0 && _stream != nullptr)
<< "null hip stream does not have index 0";
if(!_data.emplace(_stream, rocprofiler_stream_id_t{.handle = idx}).second)
{
auto idx = _data.size();
ROCP_INFO << fmt::format("hipStream_t={} :: id={}.handle={}{}",
static_cast<void*>(_stream),
'{',
idx,
'}');
_data.emplace(_stream, rocprofiler_stream_id_t{.handle = idx});
idx_offset += 1;
auto _existing = _data.at(_stream);
ROCP_INFO << "existing hipStream_t ("
<< sdk::utility::as_hex(static_cast<void*>(_stream))
<< ") reallocated. rocprofiler_stream_id_t{.handle = " << _existing.handle
<< "} -> rocprofiler_stream_id_t{.handle = " << idx << "}";
_data.at(_stream) = rocprofiler_stream_id_t{.handle = idx};
}
return _data.at(_stream);
},
stream);
@@ -353,13 +365,16 @@ enable_stream_stack()
for(const auto& itr : context::get_registered_contexts())
{
if(itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY) ||
itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API) ||
itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API))
if(itr->is_tracing_one_of(ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY,
ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API,
ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API,
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM_API,
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API,
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API,
ROCPROFILER_BUFFER_TRACING_HIP_STREAM_API,
ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT,
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT))
return true;
}
@@ -371,8 +386,9 @@ enable_compiler_stream_stack()
{
for(const auto& itr : context::get_registered_contexts())
{
if(itr->is_tracing(ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API) ||
itr->is_tracing(ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API))
if(itr->is_tracing_one_of(ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API,
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API,
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT))
return true;
}
@@ -399,6 +415,11 @@ update_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
ROCP_TRACE << "updating table entry for " << _info.name;
constexpr auto num_args = function_args_type::size();
constexpr auto is_hip_pop_call_config_func =
std::is_same<decltype(info_type::operation_idx),
rocprofiler_hip_compiler_api_id_t>::value &&
(static_cast<rocprofiler_hip_compiler_api_id_t>(info_type::operation_idx) ==
ROCPROFILER_HIP_COMPILER_API_ID___hipPopCallConfiguration);
if constexpr(common::mpl::is_one_of<hipStream_t, function_args_type>::value)
{
@@ -430,14 +451,17 @@ update_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamDestroy);
if constexpr(is_hip_destroy_func)
{
ROCP_INFO << _info.name << " has been designated as a stream destroy function";
_func = create_destroy_functor<TableIdx, OpIdx>(_func);
}
else
{
ROCP_INFO << _info.name << " has been designated as a stream set function";
_func = create_read_functor<TableIdx, OpIdx>(_func);
}
}
else if constexpr(common::mpl::is_one_of<hipStream_t*, function_args_type>::value)
else if constexpr(common::mpl::is_one_of<hipStream_t*, function_args_type>::value &&
!is_hip_pop_call_config_func)
{
constexpr auto stream_idx =
common::mpl::index_of<hipStream_t*, function_args_type>::value;
@@ -456,6 +480,8 @@ update_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
if(!enable_compiler_stream_stack()) return;
}
ROCP_INFO << _info.name << " has been designated as a stream create function";
// 1. get the sub-table containing the function pointer in original table
// 2. get reference to function pointer in sub-table in original table
// 3. update function pointer with wrapper
@@ -464,6 +490,9 @@ update_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
_func = create_write_functor<TableIdx, OpIdx>(_func);
}
}
// suppress unused-but-set-parameter warning
common::consume_args(_orig);
}
template <size_t TableIdx, typename Tp, size_t OpIdx, size_t... OpIdxTail>
@@ -168,6 +168,11 @@ struct async_copy_data
buffered_data_t get_buffered_record(const context_t* _ctx,
timestamp_t _beg = 0,
timestamp_t _end = 0) const;
auto get_lock() { return std::make_unique<std::unique_lock<std::mutex>>(m_mtx); }
private:
std::mutex m_mtx = {};
};
async_copy_data::callback_data_t
@@ -347,12 +352,22 @@ async_copy_handler(hsa_signal_value_t signal_value, void* arg)
auto ts = common::timestamp_ns();
auto* _data = static_cast<async_copy_data*>(arg);
auto _lk = _data->get_lock();
auto copy_time = hsa_amd_profiling_async_copy_time_t{};
auto copy_time_status = get_amd_ext_table()->hsa_amd_profiling_get_async_copy_time_fn(
_data->rocp_signal, &copy_time);
auto _profile_time = tracing::profiling_time{copy_time_status, copy_time.start, copy_time.end};
// we need to decrement this reference count at the end of the functions
auto* _corr_id = _data->correlation_id;
auto _dtor = common::scope_destructor{[&_lk, &_data, &_corr_id]() {
_lk.reset(); // reset the unique_ptr so the lock is released
delete _data;
if(_corr_id) _corr_id->sub_ref_count();
}};
if(_profile_time.status == HSA_STATUS_SUCCESS)
{
_profile_time = tracing::adjust_profiling_time(
@@ -375,8 +390,6 @@ async_copy_handler(hsa_signal_value_t signal_value, void* arg)
// get the contexts that were active when the signal was created
const auto& tracing_data = _data->tracing_data;
// we need to decrement this reference count at the end of the functions
auto* _corr_id = _data->correlation_id;
if(_profile_time.status == HSA_STATUS_SUCCESS && !tracing_data.empty())
{
@@ -431,9 +444,6 @@ async_copy_handler(hsa_signal_value_t signal_value, void* arg)
}
ROCP_HSA_TABLE_CALL(ERROR, get_core_table()->hsa_signal_destroy_fn(_data->rocp_signal));
delete _data;
if(_corr_id) _corr_id->sub_ref_count();
return false;
}
@@ -619,6 +629,7 @@ async_copy_impl(Args... args)
_data->tracing_data = std::move(tracing_data);
}
auto _lk = _data->get_lock();
auto& tracing_data = _data->tracing_data;
// at this point, we want to install our own signal handler
@@ -23,6 +23,7 @@
#pragma once
#include "lib/common/mpl.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/buffer.hpp"
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/context/correlation_id.hpp"
@@ -99,6 +100,57 @@ populate_contexts(rocprofiler_callback_tracing_kind_t callback_domain_idx,
}
}
template <typename ClearContainersT = std::false_type,
typename DomainIdx,
typename ContextContainerT>
inline void
populate_contexts(DomainIdx domain_idx,
rocprofiler_tracing_operation_t operation_idx,
ContextContainerT& contexts,
external_correlation_id_map_t& extern_corr_ids,
ClearContainersT = ClearContainersT{})
{
if constexpr(ClearContainersT::value)
{
contexts.clear();
extern_corr_ids.clear();
}
const auto minimal_context_filter = [](const context_t* ctx) {
return (ctx->callback_tracer || ctx->buffered_tracer);
};
for(const auto* itr : context::get_active_contexts(minimal_context_filter))
{
if(!itr) continue;
if constexpr(std::is_same<DomainIdx, rocprofiler_callback_tracing_kind_t>::value)
{
// if the given domain + op is not enabled, skip this context
if(context_filter(itr, domain_idx, operation_idx))
{
contexts.emplace_back(
callback_context_data{itr, rocprofiler_callback_tracing_record_t{}});
extern_corr_ids.emplace(itr, empty_user_data);
}
}
else if constexpr(std::is_same<DomainIdx, rocprofiler_buffer_tracing_kind_t>::value)
{
// if the given domain + op is not enabled, skip this context
if(context_filter(itr, domain_idx, operation_idx))
{
contexts.emplace_back(buffered_context_data{itr});
extern_corr_ids.emplace(itr, empty_user_data);
}
}
else
{
static_assert(common::mpl::assert_false<DomainIdx>::value,
"Error! invalid domain type");
}
}
}
template <typename ClearContainersT = std::false_type>
inline void
populate_contexts(rocprofiler_callback_tracing_kind_t callback_domain_idx,
@@ -350,6 +402,8 @@ execute_buffer_record_emplace(const buffered_context_data_vec_t& buffered_cont
buffer_v->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING, domain, record_v);
}
}
common::consume_args(ancestor_corr_id);
}
} // namespace tracing
} // namespace rocprofiler
@@ -91,7 +91,10 @@ def test_hip_api_trace_json(json_data):
def get_kind_name(kind_id):
return data["strings"]["buffer_records"][kind_id]["kind"]
valid_domain_names = ("HIP_RUNTIME_API",)
valid_domain_names = (
"HIP_RUNTIME_API",
"HIP_RUNTIME_API_EXT",
)
hip_api_data = data["buffer_records"]["hip_api"]
@@ -61,7 +61,10 @@ def test_hip_api_trace(json_data):
def get_kind_name(kind_id):
return data["strings"]["buffer_records"][kind_id]["kind"]
valid_domain_names = ("HIP_RUNTIME_API",)
valid_domain_names = (
"HIP_RUNTIME_API",
"HIP_RUNTIME_API_EXT",
)
hip_api_data = data["buffer_records"]["hip_api"]
@@ -37,7 +37,10 @@ def test_hip_api_trace(json_data):
def get_kind_name(kind_id):
return data["strings"]["buffer_records"][kind_id]["kind"]
valid_domain_names = ("HIP_RUNTIME_API",)
valid_domain_names = (
"HIP_RUNTIME_API",
"HIP_RUNTIME_API_EXT",
)
hip_api_data = data["buffer_records"]["hip_api"]
@@ -89,6 +89,8 @@ def test_api_trace(
assert row["Domain"] in [
"HIP_RUNTIME_API",
"HIP_COMPILER_API",
"HIP_RUNTIME_API_EXT",
"HIP_COMPILER_API_EXT",
]
assert int(row["Process_Id"]) > 0
assert int(row["Thread_Id"]) == 0 or int(row["Thread_Id"]) >= int(
@@ -210,6 +212,8 @@ def test_api_trace_json(json_data):
valid_hip_domain = [
"HIP_RUNTIME_API",
"HIP_COMPILER_API",
"HIP_RUNTIME_API_EXT",
"HIP_COMPILER_API_EXT",
]
valid_marker_domain = [