From 2c4e20b951e6f0889f1bc0b0e314478a6408579e Mon Sep 17 00:00:00 2001 From: "Welton, Benjamin" Date: Tue, 8 Jul 2025 23:41:22 -0700 Subject: [PATCH] [SWDEV-516561][1/2] Add MARKER_RANGE_EXTENT to capture ROCTX ranges (#363) * [SWDEV-516561][1/2] Add MARKER_RANGE_EXTENT to capture ROCTX ranges Range extent to capture all work between roctxpush/pop operations. Entry callback takes place during roxtxpush and exit callback takes place in roctxpop. This is primarily to allow us to keep an ancestor id on the ancestor stack such that all operations that take place within the push/pop context can be annotated as being apart of this range. With the current setup (where push and pop are two separate operations that need to be combined externally), we cannot keep an ancestor id on the stack and thus cannot tie tracing events to particular ranges. Correlation id information is inherited from the push operation. Ancestor id needs to be added in a future commit that also outputs this ancestor to CSV. Output: ``` [ctest] {'size': 64, 'kind': 7, 'operation': 1, 'correlation_id': {'internal': 1525, 'external': 0, 'ancestor': 1524}, 'start_timestamp': 2932551479402642, 'end_timestamp': 2932551491178449, 'thread_id': 3254861} [ctest] {'size': 64, 'kind': 8, 'operation': 2, 'correlation_id': {'internal': 1525, 'external': 0, 'ancestor': 1524}, 'start_timestamp': 2932551479405878, 'end_timestamp': 2932551491181214, 'thread_id': 3254861} ``` Note: Kind 8 = range extent op. * Merge fix Revert several changes source/lib/rocprofiler-sdk/marker/range_marker.* - separate out range marker implementation for standard marker implementation Update public API with marker core range Support marker core range in sdk (source/lib/rocprofiler-sdk) Transition rocprofiler-sdk-tool and output lib to use marker core range Misc fixes for tests Fix logic in lib/output/generate{CSV,Stats}.cpp Update tests/rocprofv3/tracing-hip-in-libraries (marker validation) Fix test_otf2_data * Test fixes --------- Co-authored-by: Benjamin Welton --- .../rocprofiler-sdk/cxx/enum_string.hpp | 22 +- .../include/rocprofiler-sdk/cxx/perfetto.hpp | 2 + .../rocprofiler-sdk/external_correlation.h | 37 +- source/include/rocprofiler-sdk/fwd.h | 5 +- .../include/rocprofiler-sdk/marker/api_args.h | 9 + .../include/rocprofiler-sdk/marker/api_id.h | 10 + .../include/rocprofiler-sdk/marker/table_id.h | 1 + source/lib/output/generateCSV.cpp | 8 +- source/lib/output/generateOTF2.cpp | 8 +- source/lib/output/generatePerfetto.cpp | 4 +- source/lib/output/generateRocpd.cpp | 4 +- source/lib/output/generateStats.cpp | 8 +- source/lib/rocprofiler-sdk-tool/helper.hpp | 4 +- source/lib/rocprofiler-sdk-tool/tool.cpp | 136 ++- source/lib/rocprofiler-sdk/buffer_tracing.cpp | 12 + .../lib/rocprofiler-sdk/callback_tracing.cpp | 13 + .../rocprofiler-sdk/external_correlation.cpp | 1 + .../lib/rocprofiler-sdk/marker/CMakeLists.txt | 2 +- source/lib/rocprofiler-sdk/marker/defines.hpp | 220 +++++ source/lib/rocprofiler-sdk/marker/marker.cpp | 1 + .../lib/rocprofiler-sdk/marker/marker.def.cpp | 1 + source/lib/rocprofiler-sdk/marker/marker.hpp | 13 + .../rocprofiler-sdk/marker/range_marker.cpp | 846 ++++++++++++++++++ .../marker/range_marker.def.cpp | 73 ++ source/lib/rocprofiler-sdk/registration.cpp | 2 + source/lib/rocprofiler-sdk/tests/common.hpp | 1 + source/lib/rocprofiler-sdk/tests/naming.cpp | 3 + source/lib/rocprofiler-sdk/tests/roctx.cpp | 2 + tests/pytest-packages/tests/rocprofv3.py | 10 +- tests/python-bindings/validate.py | 8 +- tests/rocprofv3/python-bindings/validate.py | 8 +- tests/rocprofv3/roctracer-roctx/validate.py | 7 +- .../tracing-hip-in-libraries/validate.py | 8 +- tests/rocprofv3/tracing/validate.py | 8 +- tests/tools/json-tool.cpp | 23 +- 35 files changed, 1391 insertions(+), 129 deletions(-) create mode 100644 source/lib/rocprofiler-sdk/marker/range_marker.cpp create mode 100644 source/lib/rocprofiler-sdk/marker/range_marker.def.cpp diff --git a/source/include/rocprofiler-sdk/cxx/enum_string.hpp b/source/include/rocprofiler-sdk/cxx/enum_string.hpp index 143cd87279..8cfc6f1fad 100644 --- a/source/include/rocprofiler-sdk/cxx/enum_string.hpp +++ b/source/include/rocprofiler-sdk/cxx/enum_string.hpp @@ -1020,6 +1020,16 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_NAME_API_ID_roctxNameHipDevice); ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_NAME_API_ID_roctxNameHipStream); static_assert(ROCPROFILER_MARKER_NAME_API_ID_LAST == 4); +ROCPROFILER_ENUM_INFO(rocprofiler_marker_core_range_api_id_t, + 0, + ROCPROFILER_MARKER_CORE_RANGE_API_ID_LAST, + false) +ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId); +static_assert(ROCPROFILER_MARKER_CORE_RANGE_API_ID_LAST == 4); + // rocprofiler_ompt_operation_t ROCPROFILER_ENUM_INFO(rocprofiler_ompt_operation_t, 0, ROCPROFILER_OMPT_ID_LAST, false); ROCPROFILER_ENUM_LABEL(ROCPROFILER_OMPT_ID_thread_begin); @@ -1307,7 +1317,8 @@ static_assert(ROCPROFILER_HIP_TABLE_ID_LAST == 2); ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_TABLE_ID_RoctxCore); ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_TABLE_ID_RoctxControl); ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_TABLE_ID_RoctxName); -static_assert(ROCPROFILER_MARKER_TABLE_ID_LAST == 3); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange); +static_assert(ROCPROFILER_MARKER_TABLE_ID_LAST == 4); // rocprofiler_rccl_table_id_t ROCPROFILER_ENUM_LABEL(ROCPROFILER_RCCL_TABLE_ID); @@ -1393,6 +1404,7 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_HSA_FINALIZE_EXT_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT); @@ -1406,7 +1418,7 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION); ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_ROCJPEG_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_HIP_STREAM); -static_assert(ROCPROFILER_CALLBACK_TRACING_LAST == 21); +static_assert(ROCPROFILER_CALLBACK_TRACING_LAST == 22); // rocprofiler_buffer_tracing_kind_t ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_NONE); @@ -1417,6 +1429,7 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY); @@ -1441,7 +1454,7 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_EVENT_DROPPED_EVENTS); ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_PAGE_MIGRATE); ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_PAGE_FAULT); ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_QUEUE); -static_assert(ROCPROFILER_BUFFER_TRACING_LAST == 32); +static_assert(ROCPROFILER_BUFFER_TRACING_LAST == 33); // rocprofiler_code_object_operation_t ROCPROFILER_ENUM_LABEL(ROCPROFILER_CODE_OBJECT_NONE); @@ -1637,7 +1650,8 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_OMPT); ROCPROFILER_ENUM_LABEL(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_ALLOCATION); ROCPROFILER_ENUM_LABEL(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCDECODE_API); ROCPROFILER_ENUM_LABEL(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCJPEG_API); -static_assert(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_LAST == 18); +ROCPROFILER_ENUM_LABEL(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_CORE_RANGE_API); +static_assert(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_LAST == 19); // rocprofiler_thread_trace_parameter_type_t ROCPROFILER_ENUM_LABEL(ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU); diff --git a/source/include/rocprofiler-sdk/cxx/perfetto.hpp b/source/include/rocprofiler-sdk/cxx/perfetto.hpp index d9ed56dca6..64006765b2 100644 --- a/source/include/rocprofiler-sdk/cxx/perfetto.hpp +++ b/source/include/rocprofiler-sdk/cxx/perfetto.hpp @@ -211,6 +211,7 @@ ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(HSA_FINALIZE_EXT_API, hsa_api) ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(HIP_RUNTIME_API, hip_api) ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(HIP_COMPILER_API, hip_api) ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(MARKER_CORE_API, marker_api) +ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(MARKER_CORE_RANGE_API, marker_api) ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(MARKER_CONTROL_API, marker_api) ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(MARKER_NAME_API, marker_api) ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(MEMORY_COPY, memory_copy) @@ -244,6 +245,7 @@ ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(HSA_FINALIZE_EXT_API, hsa_api) ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(HIP_RUNTIME_API, hip_api) ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(HIP_COMPILER_API, hip_api) ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(MARKER_CORE_API, marker_api) +ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(MARKER_CORE_RANGE_API, marker_api) ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(MARKER_CONTROL_API, marker_api) ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(MARKER_NAME_API, marker_api) ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(CODE_OBJECT, none) diff --git a/source/include/rocprofiler-sdk/external_correlation.h b/source/include/rocprofiler-sdk/external_correlation.h index 3711b83970..391fb8abf9 100644 --- a/source/include/rocprofiler-sdk/external_correlation.h +++ b/source/include/rocprofiler-sdk/external_correlation.h @@ -54,24 +54,25 @@ ROCPROFILER_EXTERN_C_INIT // NOLINTNEXTLINE(performance-enum-size) typedef enum ROCPROFILER_SDK_EXPERIMENTAL rocprofiler_external_correlation_id_request_kind_t { - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_NONE = 0, ///< Unknown kind - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_CORE_API, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_AMD_EXT_API, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_IMAGE_EXT_API, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_FINALIZE_EXT_API, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HIP_RUNTIME_API, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HIP_COMPILER_API, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_CORE_API, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_CONTROL_API, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_NAME_API, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_SCRATCH_MEMORY, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_RCCL_API, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_OMPT, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_ALLOCATION, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCDECODE_API, ///< - ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCJPEG_API, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_NONE = 0, ///< Unknown kind + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_CORE_API, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_AMD_EXT_API, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_IMAGE_EXT_API, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_FINALIZE_EXT_API, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HIP_RUNTIME_API, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HIP_COMPILER_API, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_CORE_API, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_CONTROL_API, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_NAME_API, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_SCRATCH_MEMORY, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_RCCL_API, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_OMPT, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_ALLOCATION, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCDECODE_API, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCJPEG_API, ///< + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_CORE_RANGE_API, ///< ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_LAST, } rocprofiler_external_correlation_id_request_kind_t; diff --git a/source/include/rocprofiler-sdk/fwd.h b/source/include/rocprofiler-sdk/fwd.h index b9d1ca72a1..12d1813214 100644 --- a/source/include/rocprofiler-sdk/fwd.h +++ b/source/include/rocprofiler-sdk/fwd.h @@ -175,6 +175,8 @@ typedef enum rocprofiler_callback_tracing_kind_t // NOLINT(performance-enum-siz ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API, ///< rocDecode API Tracing ROCPROFILER_CALLBACK_TRACING_ROCJPEG_API, ///< rocJPEG API Tracing ROCPROFILER_CALLBACK_TRACING_HIP_STREAM, ///< @see ::rocprofiler_hip_stream_operation_t + ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API, ///< @see + ///< ::rocprofiler_marker_core_range_api_id_t ROCPROFILER_CALLBACK_TRACING_LAST, } rocprofiler_callback_tracing_kind_t; @@ -224,7 +226,8 @@ typedef enum rocprofiler_buffer_tracing_kind_t // NOLINT(performance-enum-size) ROCPROFILER_BUFFER_TRACING_KFD_PAGE_MIGRATE, ///< @see rocprofiler_kfd_page_migrate_operation_t ROCPROFILER_BUFFER_TRACING_KFD_PAGE_FAULT, ///< @see rocprofiler_kfd_page_fault_operation_t ROCPROFILER_BUFFER_TRACING_KFD_QUEUE, ///< @see rocprofiler_kfd_queue_operation_t - + ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API, ///< @see + ///< ::rocprofiler_marker_core_range_api_id_t ROCPROFILER_BUFFER_TRACING_LAST, /// @var ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT diff --git a/source/include/rocprofiler-sdk/marker/api_args.h b/source/include/rocprofiler-sdk/marker/api_args.h index aff3e10c0d..4be2432bca 100644 --- a/source/include/rocprofiler-sdk/marker/api_args.h +++ b/source/include/rocprofiler-sdk/marker/api_args.h @@ -101,6 +101,15 @@ typedef union rocprofiler_marker_api_args_t const char* name; const struct ihipStream_t* stream; } roctxNameHipStream; + struct + { + const char* message; + } roctxThreadRangeA; + struct + { + const char* message; + // roctx_range_id_t id; // only set when range ends in callback tracing + } roctxProcessRangeA; } rocprofiler_marker_api_args_t; ROCPROFILER_EXTERN_C_FINI diff --git a/source/include/rocprofiler-sdk/marker/api_id.h b/source/include/rocprofiler-sdk/marker/api_id.h index 2b30f19658..c2dc766a4c 100644 --- a/source/include/rocprofiler-sdk/marker/api_id.h +++ b/source/include/rocprofiler-sdk/marker/api_id.h @@ -54,3 +54,13 @@ typedef enum rocprofiler_marker_name_api_id_t // NOLINT(performance-enum-size) ROCPROFILER_MARKER_NAME_API_ID_roctxNameHipStream, ROCPROFILER_MARKER_NAME_API_ID_LAST, } rocprofiler_marker_name_api_id_t; + +typedef enum rocprofiler_marker_core_range_api_id_t // NOLINT(performance-enum-size) +{ + ROCPROFILER_MARKER_CORE_RANGE_API_ID_NONE = -1, + ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA = 0, + ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA, + ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA, + ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId, + ROCPROFILER_MARKER_CORE_RANGE_API_ID_LAST, +} rocprofiler_marker_core_range_api_id_t; diff --git a/source/include/rocprofiler-sdk/marker/table_id.h b/source/include/rocprofiler-sdk/marker/table_id.h index 07cf3e406e..a27fade7f0 100644 --- a/source/include/rocprofiler-sdk/marker/table_id.h +++ b/source/include/rocprofiler-sdk/marker/table_id.h @@ -29,5 +29,6 @@ typedef enum rocprofiler_marker_table_id_t ROCPROFILER_MARKER_TABLE_ID_RoctxCore = 0, ROCPROFILER_MARKER_TABLE_ID_RoctxControl, ROCPROFILER_MARKER_TABLE_ID_RoctxName, + ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange, ROCPROFILER_MARKER_TABLE_ID_LAST, } rocprofiler_marker_table_id_t; diff --git a/source/lib/output/generateCSV.cpp b/source/lib/output/generateCSV.cpp index 68363c9d7f..e8dacc9511 100644 --- a/source/lib/output/generateCSV.cpp +++ b/source/lib/output/generateCSV.cpp @@ -537,10 +537,10 @@ generate_csv(const output_config& cf auto row_ss = std::stringstream{}; auto _name = std::string_view{}; - if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API && - (record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA || - record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA || - record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA)) + if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API && + (record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA || + record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA || + record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA)) { _name = tool_metadata.get_marker_message(record.correlation_id.internal); } diff --git a/source/lib/output/generateOTF2.cpp b/source/lib/output/generateOTF2.cpp index f0ace1ff5c..7c31f8b187 100644 --- a/source/lib/output/generateOTF2.cpp +++ b/source/lib/output/generateOTF2.cpp @@ -578,8 +578,8 @@ write_otf2(const output_config& cfg, if(!_inp) return; for(auto itr : *_inp) { - if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API && - itr.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA) + if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API && + itr.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA) continue; using value_type = common::mpl::unqualified_type_t; @@ -589,8 +589,8 @@ write_otf2(const output_config& cfg, rocprofiler_buffer_tracing_marker_api_record_t>::value) { paradigm = OTF2_PARADIGM_USER; - if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API && - itr.operation != ROCPROFILER_MARKER_CORE_API_ID_roctxGetThreadId) + if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API && + itr.operation != ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId) name = tool_metadata.get_marker_message(itr.correlation_id.internal); } diff --git a/source/lib/output/generatePerfetto.cpp b/source/lib/output/generatePerfetto.cpp index b807bfd09e..cea6feb910 100644 --- a/source/lib/output/generatePerfetto.cpp +++ b/source/lib/output/generatePerfetto.cpp @@ -392,8 +392,8 @@ write_perfetto( for(auto itr : marker_api_gen.get(ditr)) { auto& track = thread_tracks.at(itr.thread_id); - auto name = (itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API && - itr.operation != ROCPROFILER_MARKER_CORE_API_ID_roctxGetThreadId) + auto name = (itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API && + itr.operation != ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId) ? tool_metadata.get_marker_message(itr.correlation_id.internal) : buffer_names.at(itr.kind, itr.operation); diff --git a/source/lib/output/generateRocpd.cpp b/source/lib/output/generateRocpd.cpp index 6567767f45..c30dfc5b66 100644 --- a/source/lib/output/generateRocpd.cpp +++ b/source/lib/output/generateRocpd.cpp @@ -1249,10 +1249,10 @@ write_rocpd( auto name = tool_metadata.buffer_names.at(itr.kind, itr.operation); auto msg = std::string{"{}"}; - if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API) + if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API) { if(static_cast(itr.operation) != - ROCPROFILER_MARKER_CORE_API_ID_roctxGetThreadId) + ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId) { // check generatePerfetto.cpp and generateOTF2.cpp, and the marker name in // the view diff --git a/source/lib/output/generateStats.cpp b/source/lib/output/generateStats.cpp index 13193c9a5f..bc6bd52955 100644 --- a/source/lib/output/generateStats.cpp +++ b/source/lib/output/generateStats.cpp @@ -147,10 +147,10 @@ generate_stats(const output_config& /*cfg*/, { auto _name = std::string_view{}; - if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API && - (record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA || - record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA || - record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA)) + if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API && + (record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA || + record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA || + record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA)) { _name = tool_metadata.get_marker_message(record.correlation_id.internal); } diff --git a/source/lib/rocprofiler-sdk-tool/helper.hpp b/source/lib/rocprofiler-sdk-tool/helper.hpp index 27acc7d2da..a7f9272805 100644 --- a/source/lib/rocprofiler-sdk-tool/helper.hpp +++ b/source/lib/rocprofiler-sdk-tool/helper.hpp @@ -124,8 +124,8 @@ struct marker_tracing_kind_conversion; }; MAP_TRACING_KIND_CONVERSION(MARKER_API_CORE, - ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API, - ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API) + ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API, + ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API) MAP_TRACING_KIND_CONVERSION(MARKER_API_CONTROL, ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API, ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API) diff --git a/source/lib/rocprofiler-sdk-tool/tool.cpp b/source/lib/rocprofiler-sdk-tool/tool.cpp index 00c80ec8e4..aaab7bf80b 100644 --- a/source/lib/rocprofiler-sdk-tool/tool.cpp +++ b/source/lib/rocprofiler-sdk-tool/tool.cpp @@ -567,7 +567,7 @@ kernel_rename_callback(rocprofiler_callback_tracing_record_t record, { if(!tool::get_config().kernel_rename || thread_dispatch_rename == nullptr) return; - if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API) + if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API) { auto* marker_data = static_cast(record.payload); @@ -576,22 +576,22 @@ kernel_rename_callback(rocprofiler_callback_tracing_record_t record, return std::string_view{*common::get_string_entry(_hash_v)}; }; - if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA && + if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA && record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT && marker_data->args.roctxMarkA.message) { thread_dispatch_rename->emplace(tool_metadata->add_kernel_rename_val( add_message(marker_data->args.roctxMarkA.message), record.correlation_id.internal)); } - else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA && - record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT && - marker_data->args.roctxRangePushA.message) + else if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA && + record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER && + marker_data->args.roctxThreadRangeA.message) { thread_dispatch_rename->emplace(tool_metadata->add_kernel_rename_val( - add_message(marker_data->args.roctxRangePushA.message), + add_message(marker_data->args.roctxThreadRangeA.message), record.correlation_id.internal)); } - else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePop && - record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) + else if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA && + record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) { ROCP_FATAL_IF(thread_dispatch_rename->empty()) << "roctxRangePop invoked more times than roctxRangePush on thread " @@ -600,6 +600,12 @@ kernel_rename_callback(rocprofiler_callback_tracing_record_t record, thread_dispatch_rename->pop(); } } + else + { + ROCP_CI_LOG(INFO) << fmt::format( + "Unsupported operation for {}", + tool_metadata->get_operation_name(record.kind, record.operation)); + } common::consume_args(user_data, data); } @@ -696,12 +702,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record, rocprofiler_user_data_t* user_data, void* data) { - static thread_local auto stacked_range = - std::vector{}; - static auto global_range = common::Synchronized< - std::unordered_map>{}; - - if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API) + if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API) { auto* marker_data = static_cast(record.payload); @@ -709,7 +710,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record, auto ts = rocprofiler_timestamp_t{}; rocprofiler_get_timestamp(&ts); - if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA) + if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA) { if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) { @@ -728,83 +729,60 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record, tool::write_ring_buffer(marker_record, domain_type::MARKER); } } - else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA) + else if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA) { - if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) + if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) { - if(marker_data->args.roctxRangePushA.message) + user_data->value = ts; + + if(marker_data->args.roctxThreadRangeA.message) { CHECK_NOTNULL(tool_metadata) ->add_marker_message( record.correlation_id.internal, - std::string{marker_data->args.roctxRangePushA.message}); - - auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{}; - marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t); - marker_record.kind = convert_marker_tracing_kind(record.kind); - marker_record.operation = record.operation; - marker_record.thread_id = record.thread_id; - marker_record.correlation_id = record.correlation_id; - marker_record.start_timestamp = ts; - marker_record.end_timestamp = 0; - - stacked_range.emplace_back(marker_record); + std::string{marker_data->args.roctxThreadRangeA.message}); } } - } - else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePop) - { - if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) + else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) { - ROCP_FATAL_IF(stacked_range.empty()) - << "roctxRangePop invoked more times than roctxRangePush on thread " - << rocprofiler::common::get_tid(); - - auto val = stacked_range.back(); - stacked_range.pop_back(); - - val.end_timestamp = ts; - tool::write_ring_buffer(val, domain_type::MARKER); - } - } - else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA) - { - if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT && - marker_data->args.roctxRangeStartA.message) - { - CHECK_NOTNULL(tool_metadata) - ->add_marker_message(record.correlation_id.internal, - std::string{marker_data->args.roctxRangeStartA.message}); - auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{}; marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t); marker_record.kind = convert_marker_tracing_kind(record.kind); marker_record.operation = record.operation; marker_record.thread_id = record.thread_id; marker_record.correlation_id = record.correlation_id; - marker_record.start_timestamp = ts; - marker_record.end_timestamp = 0; + marker_record.start_timestamp = user_data->value; + marker_record.end_timestamp = ts; - auto _id = marker_data->retval.roctx_range_id_t_retval; - global_range.wlock( - [](auto& map, roctx_range_id_t _range_id, auto&& _record) { - map.emplace(_range_id, std::move(_record)); - }, - _id, - marker_record); + tool::write_ring_buffer(marker_record, domain_type::MARKER); } } - else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStop) + else if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA) { if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER) { - auto _id = marker_data->args.roctxRangeStop.id; - auto&& _entry = global_range.rlock( - [](const auto& map, auto _key) { return map.at(_key); }, _id); + user_data->value = ts; - _entry.end_timestamp = ts; - tool::write_ring_buffer(_entry, domain_type::MARKER); - global_range.wlock([](auto& map, auto _key) { return map.erase(_key); }, _id); + if(marker_data->args.roctxProcessRangeA.message) + { + CHECK_NOTNULL(tool_metadata) + ->add_marker_message( + record.correlation_id.internal, + std::string{marker_data->args.roctxProcessRangeA.message}); + } + } + else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT) + { + auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{}; + marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t); + marker_record.kind = convert_marker_tracing_kind(record.kind); + marker_record.operation = record.operation; + marker_record.thread_id = record.thread_id; + marker_record.correlation_id = record.correlation_id; + marker_record.start_timestamp = user_data->value; + marker_record.end_timestamp = ts; + + tool::write_ring_buffer(marker_record, domain_type::MARKER); } } else @@ -827,6 +805,12 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record, } } } + else + { + ROCP_CI_LOG(INFO) << fmt::format( + "Unsupported operation for {}", + tool_metadata->get_operation_name(record.kind, record.operation)); + } (void) data; } @@ -1737,7 +1721,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) { ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service( get_client_ctx(), - ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API, + ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API, nullptr, 0, callbacks.callback_tracing, @@ -1971,16 +1955,16 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) if(tool::get_config().kernel_rename) { auto rename_ctx = rocprofiler_context_id_t{0}; - auto marker_core_api_kinds = std::array{ - ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA, - ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA, - ROCPROFILER_MARKER_CORE_API_ID_roctxRangePop}; + auto marker_core_api_kinds = std::array{ + ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA, + ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA, + }; ROCPROFILER_CALL(rocprofiler_create_context(&rename_ctx), "failed to create context"); ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service( rename_ctx, - ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API, + ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API, marker_core_api_kinds.data(), marker_core_api_kinds.size(), callbacks.kernel_rename, diff --git a/source/lib/rocprofiler-sdk/buffer_tracing.cpp b/source/lib/rocprofiler-sdk/buffer_tracing.cpp index e680cd1693..a5104517f5 100644 --- a/source/lib/rocprofiler-sdk/buffer_tracing.cpp +++ b/source/lib/rocprofiler-sdk/buffer_tracing.cpp @@ -110,6 +110,7 @@ ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_EVENT_DROPPED_EVENTS) ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_PAGE_MIGRATE) ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_PAGE_FAULT) ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_QUEUE) +ROCPROFILER_BUFFER_TRACING_KIND_STRING(MARKER_CORE_RANGE_API) template std::pair @@ -334,6 +335,12 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_ val = rocprofiler::hip::stream::name_by_id(operation); break; } + case ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API: + { + val = rocprofiler::marker::name_by_id( + operation); + break; + } case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE: case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT: case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE: @@ -490,6 +497,11 @@ rocprofiler_iterate_buffer_tracing_kind_operations( ops = rocprofiler::hip::stream::get_ids(); break; } + case ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API: + { + ops = rocprofiler::marker::get_ids(); + break; + } case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE: case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT: case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE: diff --git a/source/lib/rocprofiler-sdk/callback_tracing.cpp b/source/lib/rocprofiler-sdk/callback_tracing.cpp index d695c1bc20..99d8a5a9cb 100644 --- a/source/lib/rocprofiler-sdk/callback_tracing.cpp +++ b/source/lib/rocprofiler-sdk/callback_tracing.cpp @@ -95,6 +95,7 @@ ROCPROFILER_CALLBACK_TRACING_KIND_STRING(RUNTIME_INITIALIZATION) ROCPROFILER_CALLBACK_TRACING_KIND_STRING(ROCDECODE_API) ROCPROFILER_CALLBACK_TRACING_KIND_STRING(ROCJPEG_API) ROCPROFILER_CALLBACK_TRACING_KIND_STRING(HIP_STREAM) +ROCPROFILER_CALLBACK_TRACING_KIND_STRING(MARKER_CORE_RANGE_API) template std::pair @@ -294,6 +295,12 @@ rocprofiler_query_callback_tracing_kind_operation_name(rocprofiler_callback_trac val = rocprofiler::hip::stream::name_by_id(operation); break; } + case ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API: + { + val = rocprofiler::marker::name_by_id( + operation); + break; + } }; if(!val) @@ -438,6 +445,11 @@ rocprofiler_iterate_callback_tracing_kind_operations( ops = rocprofiler::hip::stream::get_ids(); break; } + case ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API: + { + ops = rocprofiler::marker::get_ids(); + break; + } }; for(const auto& itr : ops) @@ -513,6 +525,7 @@ rocprofiler_iterate_callback_tracing_kind_operation_args( user_data); return ROCPROFILER_STATUS_SUCCESS; } + case ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API: case ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API: { rocprofiler::marker::iterate_args( diff --git a/source/lib/rocprofiler-sdk/external_correlation.cpp b/source/lib/rocprofiler-sdk/external_correlation.cpp index 46af922fce..ecf082d7a3 100644 --- a/source/lib/rocprofiler-sdk/external_correlation.cpp +++ b/source/lib/rocprofiler-sdk/external_correlation.cpp @@ -66,6 +66,7 @@ ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KIND_STRING(RCCL_API) ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KIND_STRING(OMPT) ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KIND_STRING(ROCDECODE_API) ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KIND_STRING(ROCJPEG_API) +ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KIND_STRING(MARKER_CORE_RANGE_API) #undef ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KIND_STRING diff --git a/source/lib/rocprofiler-sdk/marker/CMakeLists.txt b/source/lib/rocprofiler-sdk/marker/CMakeLists.txt index d2c92a5fa6..00453648bb 100644 --- a/source/lib/rocprofiler-sdk/marker/CMakeLists.txt +++ b/source/lib/rocprofiler-sdk/marker/CMakeLists.txt @@ -1,6 +1,6 @@ # # -set(ROCPROFILER_LIB_MARKER_SOURCES marker.cpp) +set(ROCPROFILER_LIB_MARKER_SOURCES marker.cpp range_marker.cpp) set(ROCPROFILER_LIB_MARKER_HEADERS defines.hpp marker.hpp utils.hpp) target_sources(rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_MARKER_SOURCES} diff --git a/source/lib/rocprofiler-sdk/marker/defines.hpp b/source/lib/rocprofiler-sdk/marker/defines.hpp index bfa7499c6c..8a98d1d608 100644 --- a/source/lib/rocprofiler-sdk/marker/defines.hpp +++ b/source/lib/rocprofiler-sdk/marker/defines.hpp @@ -32,6 +32,7 @@ template <> \ struct roctx_api_info : roctx_domain_info \ { \ + static constexpr auto is_range = false; \ static constexpr auto table_idx = MARKER_TABLE; \ static constexpr auto operation_idx = MARKER_API_ID; \ static constexpr auto name = #MARKER_FUNC; \ @@ -111,6 +112,7 @@ template <> \ struct roctx_api_info : roctx_domain_info \ { \ + static constexpr auto is_range = false; \ static constexpr auto table_idx = MARKER_TABLE; \ static constexpr auto operation_idx = MARKER_API_ID; \ static constexpr auto name = #MARKER_FUNC; \ @@ -188,6 +190,199 @@ } \ } +#define MARKER_EVENT_API_INFO_DEFINITION_V( \ + MARKER_TABLE, MARKER_API_ID, MARKER_NAME, MARKER_FUNC_PTR, ...) \ + namespace rocprofiler \ + { \ + namespace marker \ + { \ + template <> \ + struct roctx_api_info : roctx_domain_info \ + { \ + static constexpr auto is_range = false; \ + static constexpr auto table_idx = MARKER_TABLE; \ + static constexpr auto operation_idx = MARKER_API_ID; \ + static constexpr auto name = #MARKER_NAME; \ + \ + using domain_type = roctx_domain_info; \ + using this_type = roctx_api_info; \ + using base_type = roctx_api_impl; \ + \ + static constexpr auto callback_domain_idx = domain_type::callback_domain_idx; \ + static constexpr auto buffered_domain_idx = domain_type::buffered_domain_idx; \ + \ + using domain_type::args_type; \ + using domain_type::retval_type; \ + using domain_type::callback_data_type; \ + \ + static constexpr auto offset() \ + { \ + return offsetof(roctx_table_lookup::type, MARKER_FUNC_PTR); \ + } \ + \ + static auto& get_table() { return roctx_table_lookup{}(); } \ + \ + template \ + static auto& get_table(TableT& _v) \ + { \ + return roctx_table_lookup{}(_v); \ + } \ + \ + template \ + static auto& get_table_func(TableT& _table) \ + { \ + if constexpr(std::is_pointer::value) \ + { \ + assert(_table != nullptr && "nullptr to MARKER table for " #MARKER_NAME \ + " function"); \ + return _table->MARKER_FUNC_PTR; \ + } \ + else \ + { \ + return _table.MARKER_FUNC_PTR; \ + } \ + } \ + \ + static auto& get_table_func() { return get_table_func(get_table()); } \ + \ + template \ + static auto& get_api_data_args(DataT& _data) \ + { \ + return _data.MARKER_NAME; \ + } \ + \ + template \ + static auto get_functor(RetT (*)(Args...)) \ + { \ + return &base_type::functor; \ + } \ + \ + static std::vector as_arg_addr(callback_data_type trace_data) \ + { \ + return std::vector{ \ + GET_ADDR_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)}; \ + } \ + \ + static auto as_arg_list(callback_data_type trace_data, int32_t max_deref) \ + { \ + return utils::stringize( \ + max_deref, \ + GET_NAMED_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)); \ + } \ + }; \ + } \ + } + +#define MARKER_RANGE_API_INFO_DEFINITION_V( \ + MARKER_TABLE, MARKER_API_ID, MARKER_NAME, MARKER_PUSH_FUNC_PTR, MARKER_POP_FUNC_PTR, ...) \ + namespace rocprofiler \ + { \ + namespace marker \ + { \ + template <> \ + struct roctx_api_info : roctx_domain_info \ + { \ + static constexpr auto is_range = true; \ + static constexpr auto table_idx = MARKER_TABLE; \ + static constexpr auto operation_idx = MARKER_API_ID; \ + static constexpr auto name = #MARKER_NAME; \ + \ + using domain_type = roctx_domain_info; \ + using this_type = roctx_api_info; \ + using base_type = roctx_api_impl; \ + \ + static constexpr auto callback_domain_idx = domain_type::callback_domain_idx; \ + static constexpr auto buffered_domain_idx = domain_type::buffered_domain_idx; \ + \ + using domain_type::args_type; \ + using domain_type::retval_type; \ + using domain_type::callback_data_type; \ + \ + static constexpr auto push_offset() \ + { \ + return offsetof(roctx_table_lookup::type, MARKER_PUSH_FUNC_PTR); \ + } \ + \ + static constexpr auto pop_offset() \ + { \ + return offsetof(roctx_table_lookup::type, MARKER_POP_FUNC_PTR); \ + } \ + \ + static auto& get_table() { return roctx_table_lookup{}(); } \ + \ + template \ + static auto& get_table(TableT& _v) \ + { \ + return roctx_table_lookup{}(_v); \ + } \ + \ + template \ + static auto& get_push_table_func(TableT& _table) \ + { \ + if constexpr(std::is_pointer::value) \ + { \ + assert(_table != nullptr && "nullptr to MARKER table for " #MARKER_NAME \ + " function"); \ + return _table->MARKER_PUSH_FUNC_PTR; \ + } \ + else \ + { \ + return _table.MARKER_PUSH_FUNC_PTR; \ + } \ + } \ + \ + template \ + static auto& get_pop_table_func(TableT& _table) \ + { \ + if constexpr(std::is_pointer::value) \ + { \ + assert(_table != nullptr && "nullptr to MARKER table for " #MARKER_NAME \ + " function"); \ + return _table->MARKER_POP_FUNC_PTR; \ + } \ + else \ + { \ + return _table.MARKER_POP_FUNC_PTR; \ + } \ + } \ + \ + static auto& get_push_table_func() { return get_push_table_func(get_table()); } \ + static auto& get_pop_table_func() { return get_pop_table_func(get_table()); } \ + \ + template \ + static auto& get_api_data_args(DataT& _data) \ + { \ + return _data.MARKER_NAME; \ + } \ + \ + template \ + static auto get_push_functor(RetT (*)(Args...)) \ + { \ + return &base_type::push_functor; \ + } \ + \ + template \ + static auto get_pop_functor(RetT (*)(Args...)) \ + { \ + return &base_type::pop_functor; \ + } \ + \ + static std::vector as_arg_addr(callback_data_type trace_data) \ + { \ + return std::vector{ \ + GET_ADDR_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)}; \ + } \ + \ + static auto as_arg_list(callback_data_type trace_data, int32_t max_deref) \ + { \ + return utils::stringize( \ + max_deref, \ + GET_NAMED_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)); \ + } \ + }; \ + } \ + } + #define MARKER_API_TABLE_LOOKUP_DEFINITION(TABLE_ID, TYPE) \ namespace rocprofiler \ { \ @@ -218,3 +413,28 @@ }; \ } \ } + +#define MARKER_API_TABLE_LOOKUP_DEFINITION_ALT(TABLE_ID, TYPE) \ + namespace rocprofiler \ + { \ + namespace marker \ + { \ + namespace \ + { \ + template <> \ + auto* get_table() \ + { \ + return get_table_impl(); \ + } \ + } \ + \ + template <> \ + struct roctx_table_lookup \ + { \ + using type = TYPE; \ + auto& operator()(type& _v) const { return _v; } \ + auto& operator()(type* _v) const { return *_v; } \ + auto& operator()() const { return (*this)(get_table()); } \ + }; \ + } \ + } diff --git a/source/lib/rocprofiler-sdk/marker/marker.cpp b/source/lib/rocprofiler-sdk/marker/marker.cpp index 00f9135156..8be6dcb5cf 100644 --- a/source/lib/rocprofiler-sdk/marker/marker.cpp +++ b/source/lib/rocprofiler-sdk/marker/marker.cpp @@ -26,6 +26,7 @@ #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/context/correlation_id.hpp" #include "lib/rocprofiler-sdk/marker/utils.hpp" #include "lib/rocprofiler-sdk/registration.hpp" #include "lib/rocprofiler-sdk/tracing/tracing.hpp" diff --git a/source/lib/rocprofiler-sdk/marker/marker.def.cpp b/source/lib/rocprofiler-sdk/marker/marker.def.cpp index cb8f5259be..a6899a879b 100644 --- a/source/lib/rocprofiler-sdk/marker/marker.def.cpp +++ b/source/lib/rocprofiler-sdk/marker/marker.def.cpp @@ -23,6 +23,7 @@ #include "lib/rocprofiler-sdk/marker/defines.hpp" #include "lib/rocprofiler-sdk/marker/marker.hpp" +#include #include #include diff --git a/source/lib/rocprofiler-sdk/marker/marker.hpp b/source/lib/rocprofiler-sdk/marker/marker.hpp index 485a44a851..ef3d5e6019 100644 --- a/source/lib/rocprofiler-sdk/marker/marker.hpp +++ b/source/lib/rocprofiler-sdk/marker/marker.hpp @@ -63,6 +63,12 @@ struct roctx_api_impl : roctx_domain_info template static RetT functor(Args... args); + + template + static RetT push_functor(Args... args); + + template + static RetT pop_functor(Args... args); }; template @@ -96,5 +102,12 @@ copy_table(TableT* _orig, uint64_t _tbl_instance); template void update_table(TableT* _orig); + +namespace range +{ +template +void +update_table(TableT* _orig, uint64_t _tbl_instance); +} } // namespace marker } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/marker/range_marker.cpp b/source/lib/rocprofiler-sdk/marker/range_marker.cpp new file mode 100644 index 0000000000..9c179ad3c9 --- /dev/null +++ b/source/lib/rocprofiler-sdk/marker/range_marker.cpp @@ -0,0 +1,846 @@ +// MIT License +// +// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "lib/common/defines.hpp" +#include "lib/common/static_object.hpp" +#include "lib/common/utility.hpp" +#include "lib/rocprofiler-sdk/buffer.hpp" +#include "lib/rocprofiler-sdk/context/context.hpp" +#include "lib/rocprofiler-sdk/context/correlation_id.hpp" +#include "lib/rocprofiler-sdk/marker/marker.hpp" +#include "lib/rocprofiler-sdk/marker/utils.hpp" +#include "lib/rocprofiler-sdk/registration.hpp" +#include "lib/rocprofiler-sdk/tracing/tracing.hpp" + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace rocprofiler +{ +namespace marker +{ +namespace +{ +struct null_type +{}; + +template +auto +get_default_retval() +{ + if constexpr(std::is_integral::value) + return Tp{0}; + else + static_assert(std::is_empty::value, "Error! unsupported return type"); +} + +template +void +set_data_retval(DataT& _data, [[maybe_unused]] Tp _val) +{ + if constexpr(std::is_same::value) + _data.int32_t_retval = _val; + else if constexpr(std::is_same::value) + _data.int64_t_retval = _val; + else if constexpr(std::is_same::value) + _data.roctx_range_id_t_retval = _val; + else + static_assert(std::is_empty::value, "Error! unsupported return type"); +} + +template +Tp* +get_table_impl() +{ + static auto*& _v = common::static_object::construct(common::init_public_api_struct(Tp{})); + return _v; +} + +template +auto* +get_table(); + +struct range_data_t : public tracing::tracing_data +{ + using callback_api_data_t = rocprofiler_callback_tracing_marker_api_data_t; + using buffered_api_data_t = rocprofiler_buffer_tracing_marker_api_record_t; + + callback_api_data_t callback_data = common::init_public_api_struct(callback_api_data_t{}); + buffered_api_data_t buffer_record = common::init_public_api_struct(buffered_api_data_t{}); + context::correlation_id* corr_id = nullptr; + rocprofiler_thread_id_t thread_id = common::get_tid(); +}; + +auto& +get_range_thread_stack() +{ + static thread_local auto push_op_stack = common::container::small_vector{}; + return push_op_stack; +} + +auto& +get_range_process_stack() +{ + static auto push_op_stack = + common::Synchronized>{}; + return push_op_stack; +} +} // namespace + +template +template +auto +roctx_api_impl::set_data_args(DataArgsT& _data_args, Args... args) +{ + if constexpr(sizeof...(Args) == 0) + _data_args.no_args.empty = '\0'; + else + _data_args = DataArgsT{args...}; +} + +template +template +auto +roctx_api_impl::exec(FuncT&& _func, Args&&... args) +{ + using return_type = std::decay_t>; + + if(_func) + { + if constexpr(std::is_void::value) + { + _func(std::forward(args)...); + return null_type{}; + } + else + { + return _func(std::forward(args)...); + } + } + + using info_type = roctx_api_info; + ROCP_ERROR << "nullptr to next roctx function for " << info_type::name << " (" + << info_type::operation_idx << ")"; + + if constexpr(std::is_void::value) + return null_type{}; + else + return get_default_retval(); +} + +template +template +RetT +roctx_api_impl::functor(Args... args) +{ + using info_type = roctx_api_info; + using callback_api_data_t = typename roctx_domain_info::callback_data_type; + using buffered_api_data_t = typename roctx_domain_info::buffer_data_type; + + constexpr auto external_corr_id_domain_idx = + roctx_domain_info::external_correlation_id_domain_idx; + + ROCP_INFO_IF(registration::get_fini_status() != 0) << "Executing " << info_type::name; + + auto thr_id = common::get_tid(); + auto callback_contexts = tracing::callback_context_data_vec_t{}; + auto buffered_contexts = tracing::buffered_context_data_vec_t{}; + auto external_corr_ids = tracing::external_correlation_id_map_t{}; + + tracing::populate_contexts(info_type::callback_domain_idx, + info_type::buffered_domain_idx, + info_type::operation_idx, + callback_contexts, + buffered_contexts, + external_corr_ids); + + if(callback_contexts.empty() && buffered_contexts.empty()) + { + [[maybe_unused]] auto _ret = exec(info_type::get_table_func(), std::forward(args)...); + if constexpr(!std::is_void::value) + return _ret; + else + return; + } + + auto ref_count = 2; + auto buffer_record = common::init_public_api_struct(buffered_api_data_t{}); + auto callback_data = common::init_public_api_struct(callback_api_data_t{}); + auto* corr_id = tracing::correlation_service::construct(ref_count); + auto internal_corr_id = corr_id->internal; + auto ancestor_corr_id = corr_id->ancestor; + + tracing::populate_external_correlation_ids(external_corr_ids, + thr_id, + external_corr_id_domain_idx, + info_type::operation_idx, + internal_corr_id); + + // invoke the callbacks + if(!callback_contexts.empty()) + { + set_data_args(info_type::get_api_data_args(callback_data.args), + std::forward(args)...); + + tracing::execute_phase_enter_callbacks(callback_contexts, + thr_id, + internal_corr_id, + external_corr_ids, + ancestor_corr_id, + info_type::callback_domain_idx, + info_type::operation_idx, + callback_data); + } + + // enter callback may update the external correlation id field + tracing::update_external_correlation_ids( + external_corr_ids, thr_id, external_corr_id_domain_idx); + + // record the start timestamp as close to the function call as possible + if(!buffered_contexts.empty()) + { + buffer_record.start_timestamp = common::timestamp_ns(); + } + + // decrement the reference count before invoking + corr_id->sub_ref_count(); + + auto _ret = exec(info_type::get_table_func(), std::forward(args)...); + + // record the end timestamp as close to the function call as possible + if(!buffered_contexts.empty()) + { + buffer_record.end_timestamp = common::timestamp_ns(); + } + + if(!callback_contexts.empty()) + { + set_data_retval(callback_data.retval, _ret); + + tracing::execute_phase_exit_callbacks(callback_contexts, + external_corr_ids, + info_type::callback_domain_idx, + info_type::operation_idx, + callback_data); + } + + if(!buffered_contexts.empty()) + { + tracing::execute_buffer_record_emplace(buffered_contexts, + thr_id, + internal_corr_id, + external_corr_ids, + ancestor_corr_id, + info_type::buffered_domain_idx, + info_type::operation_idx, + buffer_record); + } + + // decrement the reference count after usage in the callback/buffers + corr_id->sub_ref_count(); + + context::pop_latest_correlation_id(corr_id); + + if constexpr(!std::is_void::value) return _ret; +} + +template +template +RetT +roctx_api_impl::push_functor(Args... args) +{ + using info_type = roctx_api_info; + + constexpr auto external_corr_id_domain_idx = + roctx_domain_info::external_correlation_id_domain_idx; + + ROCP_INFO_IF(registration::get_fini_status() != 0) << "Executing " << info_type::name; + + auto thr_id = common::get_tid(); + auto range_data = range_data_t{}; + auto& external_corr_ids = range_data.external_correlation_ids; + + tracing::populate_contexts(info_type::callback_domain_idx, + info_type::buffered_domain_idx, + info_type::operation_idx, + range_data); + + if(range_data.empty()) + { + [[maybe_unused]] auto _ret = + exec(info_type::get_push_table_func(), std::forward(args)...); + if constexpr(!std::is_void::value) + return _ret; + else + return; + } + + auto ref_count = 1; + auto& buffer_record = range_data.buffer_record; + auto& callback_data = range_data.callback_data; + auto*& corr_id = range_data.corr_id; + + corr_id = tracing::correlation_service::construct(ref_count); + auto internal_corr_id = corr_id->internal; + auto ancestor_corr_id = corr_id->ancestor; + + tracing::populate_external_correlation_ids(external_corr_ids, + thr_id, + external_corr_id_domain_idx, + info_type::operation_idx, + internal_corr_id); + + // invoke the callbacks + if(!range_data.callback_contexts.empty()) + { + set_data_args(info_type::get_api_data_args(callback_data.args), + std::forward(args)...); + + tracing::execute_phase_enter_callbacks(range_data.callback_contexts, + thr_id, + internal_corr_id, + external_corr_ids, + ancestor_corr_id, + info_type::callback_domain_idx, + info_type::operation_idx, + callback_data); + } + + // enter callback may update the external correlation id field + tracing::update_external_correlation_ids( + external_corr_ids, thr_id, external_corr_id_domain_idx); + + // record the start timestamp as close to the function call as possible + if(!range_data.buffered_contexts.empty()) + { + buffer_record.start_timestamp = common::timestamp_ns(); + } + + auto _ret = exec(info_type::get_push_table_func(), std::forward(args)...); + + if(!range_data.callback_contexts.empty()) + { + set_data_retval(callback_data.retval, _ret); + } + + if constexpr(OpIdx == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA) + { + get_range_thread_stack().emplace_back(std::move(range_data)); + } + else if constexpr(OpIdx == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA) + { + // push the range data to the process stack + get_range_process_stack().wlock( + [](auto& _stack, auto _key, auto&& _range_data) { + _stack.emplace(_key, std::move(_range_data)); + }, + _ret, + std::move(range_data)); + } + + if constexpr(!std::is_void::value) return _ret; +} + +template +template +RetT +roctx_api_impl::pop_functor(Args... args) +{ + using info_type = roctx_api_info; + + auto range_data = range_data_t{}; + + if constexpr(OpIdx == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA) + { + if(auto& _range_stack = get_range_thread_stack(); !_range_stack.empty()) + { + // if the range API is used, we need to use the range tracing data + // for push/pop operations, otherwise we can use the main API tracing + range_data = _range_stack.back(); + _range_stack.pop_back(); + } + } + else if constexpr(OpIdx == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA) + { + auto range_id = std::get<0>(std::tie(args...)); + static_assert(sizeof...(Args) == 1, + "roctxRangeStopA requires a single argument of type roctx_range_id_t"); + + // push the range data to the process stack + get_range_process_stack().wlock( + [](auto& _range_stack, auto _key, auto& _dst) { + // find the data for the range id if it exists, copy it and delete it + if(auto itr = _range_stack.find(_key); itr != _range_stack.end()) + { + _dst = _range_stack.at(_key); + _range_stack.erase(itr); + } + }, + range_id, + range_data); + } + + auto _ret = exec(info_type::get_pop_table_func(), std::forward(args)...); + + if(range_data.empty()) + { + if constexpr(!std::is_void::value) + return _ret; + else + return; + } + + auto& external_corr_ids = range_data.external_correlation_ids; + auto& buffer_record = range_data.buffer_record; + auto& callback_data = range_data.callback_data; + auto*& corr_id = range_data.corr_id; + + ROCP_FATAL_IF(!corr_id) << fmt::format("No correlation id found for range pop operation :: {}", + info_type::name); + + auto thr_id = range_data.thread_id; + auto internal_corr_id = corr_id->internal; + auto ancestor_corr_id = corr_id->ancestor; + + // record the end timestamp as close to the function call as possible + if(!range_data.buffered_contexts.empty()) + { + buffer_record.end_timestamp = common::timestamp_ns(); + } + + if(!range_data.callback_contexts.empty()) + { + tracing::execute_phase_exit_callbacks(range_data.callback_contexts, + external_corr_ids, + info_type::callback_domain_idx, + info_type::operation_idx, + callback_data); + } + + if(!range_data.buffered_contexts.empty()) + { + tracing::execute_buffer_record_emplace(range_data.buffered_contexts, + thr_id, + internal_corr_id, + external_corr_ids, + ancestor_corr_id, + info_type::buffered_domain_idx, + info_type::operation_idx, + buffer_record); + } + + // decrement the reference count after usage in the callback/buffers + corr_id->sub_ref_count(); + + context::pop_latest_correlation_id(corr_id); + + if constexpr(!std::is_void::value) return _ret; +} +} // namespace marker +} // namespace rocprofiler + +#define ROCPROFILER_LIB_ROCPROFILER_SDK_MARKER_RANGE_MARKER_CPP_IMPL 1 + +// template specializations +#include "range_marker.def.cpp" + +namespace rocprofiler +{ +namespace marker +{ +namespace +{ +template +const char* +name_by_id(const uint32_t id, std::index_sequence) +{ + if(OpIdx == id) return roctx_api_info::name; + + if constexpr(sizeof...(OpIdxTail) > 0) + return name_by_id(id, std::index_sequence{}); + else + return nullptr; +} + +template +uint32_t +id_by_name(const char* name, std::index_sequence) +{ + if(std::string_view{roctx_api_info::name} == std::string_view{name}) + return roctx_api_info::operation_idx; + + if constexpr(sizeof...(OpIdxTail) > 0) + return id_by_name(name, std::index_sequence{}); + else + return roctx_domain_info::none; +} + +template +void +get_ids(std::vector& _id_list, std::index_sequence) +{ + auto _idx = roctx_api_info::operation_idx; + if(_idx < roctx_domain_info::last) _id_list.emplace_back(_idx); + + if constexpr(sizeof...(OpIdxTail) > 0) + get_ids(_id_list, std::index_sequence{}); +} + +template +void +get_names(std::vector& _name_list, std::index_sequence) +{ + auto&& _name = roctx_api_info::name; + if(_name != nullptr && strnlen(_name, 1) > 0) _name_list.emplace_back(_name); + + if constexpr(sizeof...(OpIdxTail) > 0) + get_names(_name_list, std::index_sequence{}); +} + +template +void +iterate_args(const uint32_t id, + const rocprofiler_callback_tracing_marker_api_data_t& data, + rocprofiler_callback_tracing_operation_args_cb_t func, + int32_t max_deref, + void* user_data, + std::index_sequence) +{ + if(OpIdx == id) + { + using info_type = roctx_api_info; + auto&& arg_list = info_type::as_arg_list(data, max_deref); + auto&& arg_addr = info_type::as_arg_addr(data); + for(size_t i = 0; i < std::min(arg_list.size(), arg_addr.size()); ++i) + { + auto ret = func(info_type::callback_domain_idx, // kind + id, // operation + i, // arg_number + arg_addr.at(i), // arg_value_addr + arg_list.at(i).indirection_level, // indirection + arg_list.at(i).type, // arg_type + arg_list.at(i).name, // arg_name + arg_list.at(i).value.c_str(), // arg_value_str + arg_list.at(i).dereference_count, // num deref in str + user_data); + if(ret != 0) break; + } + return; + } + if constexpr(sizeof...(OpIdxTail) > 0) + iterate_args( + id, data, func, max_deref, user_data, std::index_sequence{}); +} +} // namespace + +// check out the assembly here... this compiles to a switch statement +template +const char* +name_by_id(uint32_t id) +{ + return name_by_id(id, std::make_index_sequence::last>{}); +} + +template +uint32_t +id_by_name(const char* name) +{ + return id_by_name(name, + std::make_index_sequence::last>{}); +} + +template +std::vector +get_ids() +{ + constexpr auto last_api_id = roctx_domain_info::last; + auto _data = std::vector{}; + _data.reserve(last_api_id); + get_ids(_data, std::make_index_sequence{}); + return _data; +} + +template +std::vector +get_names() +{ + constexpr auto last_api_id = roctx_domain_info::last; + auto _data = std::vector{}; + _data.reserve(last_api_id); + get_names(_data, std::make_index_sequence{}); + return _data; +} + +template +void +iterate_args(uint32_t id, + const rocprofiler_callback_tracing_marker_api_data_t& data, + rocprofiler_callback_tracing_operation_args_cb_t callback, + int32_t max_deref, + void* user_data) +{ + if(callback) + iterate_args(id, + data, + callback, + max_deref, + user_data, + std::make_index_sequence::last>{}); +} + +namespace range +{ +namespace +{ +bool +should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain, + rocprofiler_buffer_tracing_kind_t _buffered_domain, + int _operation) +{ + // we loop over all the *registered* contexts and see if any of them, at any point in time, + // might require callback or buffered API tracing + for(const auto& itr : context::get_registered_contexts()) + { + if(!itr) continue; + + // if there is a callback tracer enabled for the given domain and op, we need to wrap + if(itr->callback_tracer && itr->callback_tracer->domains(_callback_domain) && + itr->callback_tracer->domains(_callback_domain, _operation)) + return true; + + // if there is a buffered tracer enabled for the given domain and op, we need to wrap + if(itr->buffered_tracer && itr->buffered_tracer->domains(_buffered_domain) && + itr->buffered_tracer->domains(_buffered_domain, _operation)) + return true; + } + return false; +} + +template +void +copy_table(Tp* _orig, uint64_t _tbl_instance, std::integral_constant) +{ + using table_type = typename roctx_table_lookup::type; + + if constexpr(std::is_same::value) + { + auto _info = roctx_api_info{}; + + if constexpr(_info.is_range) + { + // make sure we don't access a field that doesn't exist in input table + // NOLINTNEXTLINE(misc-redundant-expression) + if(_info.push_offset() >= _orig->size || _info.pop_offset() >= _orig->size) return; + + // 1. get the sub-table containing the function pointer in original table + // 2. get reference to function pointer in sub-table in original table + auto& _orig_table = _info.get_table(_orig); + auto& _orig_push_func = _info.get_push_table_func(_orig_table); + auto& _orig_pop_func = _info.get_pop_table_func(_orig_table); + // 3. get the sub-table containing the function pointer in saved table + // 4. get reference to function pointer in sub-table in saved table + // 5. save the original function in the saved table + auto& _copy_table = _info.get_table(*get_table()); + auto& _push_copy_func = _info.get_push_table_func(_copy_table); + auto& _pop_copy_func = _info.get_pop_table_func(_copy_table); + + ROCP_FATAL_IF(_push_copy_func && _tbl_instance == 0) + << _info.name << " has non-null function pointer " << _push_copy_func + << " despite this being the first instance of the library being copies"; + + ROCP_FATAL_IF(_pop_copy_func && _tbl_instance == 0) + << _info.name << " has non-null function pointer " << _pop_copy_func + << " despite this being the first instance of the library being copies"; + + if(!_push_copy_func || !_pop_copy_func) + { + ROCP_TRACE << "copying table entry for " << _info.name; + _push_copy_func = _orig_push_func; + _pop_copy_func = _orig_pop_func; + } + else + { + ROCP_TRACE << "skipping copying table entry for " << _info.name + << " from table instance " << _tbl_instance; + } + } + else + { + // make sure we don't access a field that doesn't exist in input table + if(_info.offset() >= _orig->size) return; + + // 1. get the sub-table containing the function pointer in original table + // 2. get reference to function pointer in sub-table in original table + auto& _orig_table = _info.get_table(_orig); + auto& _orig_func = _info.get_table_func(_orig_table); + // 3. get the sub-table containing the function pointer in saved table + // 4. get reference to function pointer in sub-table in saved table + // 5. save the original function in the saved table + auto& _copy_table = _info.get_table(*get_table()); + auto& _copy_func = _info.get_table_func(_copy_table); + + ROCP_FATAL_IF(_copy_func && _tbl_instance == 0) + << _info.name << " has non-null function pointer " << _copy_func + << " despite this being the first instance of the library being copies"; + + if(!_copy_func) + { + ROCP_TRACE << "copying table entry for " << _info.name; + _copy_func = _orig_func; + } + else + { + ROCP_TRACE << "skipping copying table entry for " << _info.name + << " from table instance " << _tbl_instance; + } + } + } +} + +template +void +update_table(Tp* _orig, std::integral_constant) +{ + using table_type = typename roctx_table_lookup::type; + + if constexpr(std::is_same::value) + { + auto _info = roctx_api_info{}; + + if constexpr(_info.is_range) + { + // make sure we don't access a field that doesn't exist in input table + // NOLINTNEXTLINE(misc-redundant-expression) + if(_info.push_offset() >= _orig->size || _info.pop_offset() >= _orig->size) return; + + // check to see if there are any contexts which enable this operation in the ROCTX API + // domain + if(!should_wrap_functor( + _info.callback_domain_idx, _info.buffered_domain_idx, _info.operation_idx)) + return; + + ROCP_TRACE << "updating table entry for " << _info.name; + + // 1. get the sub-table containing the function pointer in original table + // 2. get reference to function pointer in sub-table in original table + // 3. update function pointer with wrapper + auto& _table = _info.get_table(_orig); + + auto& _push_func = _info.get_push_table_func(_table); + _push_func = _info.get_push_functor(_push_func); + + auto& _pop_func = _info.get_pop_table_func(_table); + _pop_func = _info.get_pop_functor(_pop_func); + } + else + { + // make sure we don't access a field that doesn't exist in input table + if(_info.offset() >= _orig->size) return; + + // check to see if there are any contexts which enable this operation in the ROCTX API + // domain + if(!should_wrap_functor( + _info.callback_domain_idx, _info.buffered_domain_idx, _info.operation_idx)) + return; + + ROCP_TRACE << "updating table entry for " << _info.name; + + // 1. get the sub-table containing the function pointer in original table + // 2. get reference to function pointer in sub-table in original table + // 3. update function pointer with wrapper + auto& _table = _info.get_table(_orig); + auto& _func = _info.get_table_func(_table); + _func = _info.get_functor(_func); + } + } +} + +template +void +copy_table(Tp* _orig, uint64_t _tbl_instance, std::index_sequence) +{ + copy_table(_orig, _tbl_instance, std::integral_constant{}); + if constexpr(sizeof...(OpIdxTail) > 0) + copy_table(_orig, _tbl_instance, std::index_sequence{}); +} + +template +void +update_table(Tp* _orig, std::index_sequence) +{ + update_table(_orig, std::integral_constant{}); + if constexpr(sizeof...(OpIdxTail) > 0) + update_table(_orig, std::index_sequence{}); +} +} // namespace + +template +void +copy_table(TableT* _orig, uint64_t _tbl_instance) +{ + constexpr auto TableIdx = roctx_table_id_lookup::value; + if(_orig) + copy_table( + _orig, _tbl_instance, std::make_index_sequence::last>{}); +} + +template +void +update_table(TableT* _orig, uint64_t _instv) +{ + constexpr auto TableIdx = roctx_table_id_lookup::value; + if(_orig) + { + copy_table(_orig, _instv); + update_table(_orig, + std::make_index_sequence::last>{}); + } +} +} // namespace range + +using iterate_args_data_t = rocprofiler_callback_tracing_marker_api_data_t; +using iterate_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t; + +#define INSTANTIATE_MARKER_TABLE_FUNC(TABLE_TYPE, TABLE_IDX) \ + template void range::update_table(TABLE_TYPE * _tbl, uint64_t _instv); \ + template const char* name_by_id(uint32_t); \ + template uint32_t id_by_name(const char*); \ + template std::vector get_ids(); \ + template std::vector get_names(); \ + template void iterate_args( \ + uint32_t, const iterate_args_data_t&, iterate_args_cb_t, int32_t, void*); + +INSTANTIATE_MARKER_TABLE_FUNC(roctx_core_api_table_t, ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange) + +#undef INSTANTIATE_MARKER_TABLE_FUNC +} // namespace marker +} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/marker/range_marker.def.cpp b/source/lib/rocprofiler-sdk/marker/range_marker.def.cpp new file mode 100644 index 0000000000..d8c9c2b60b --- /dev/null +++ b/source/lib/rocprofiler-sdk/marker/range_marker.def.cpp @@ -0,0 +1,73 @@ +// MIT License +// +// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "lib/rocprofiler-sdk/marker/defines.hpp" +#include "lib/rocprofiler-sdk/marker/marker.hpp" + +#include +#include +#include + +namespace rocprofiler +{ +namespace marker +{ +template <> +struct roctx_domain_info +{ + using args_type = rocprofiler_marker_api_args_t; + using retval_type = rocprofiler_marker_api_retval_t; + using callback_data_type = rocprofiler_callback_tracing_marker_api_data_t; + using buffer_data_type = rocprofiler_buffer_tracing_marker_api_record_t; +}; + +template <> +struct roctx_domain_info +: roctx_domain_info +{ + using enum_type = rocprofiler_marker_core_range_api_id_t; + static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API; + static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API; + static constexpr auto none = ROCPROFILER_MARKER_CORE_RANGE_API_ID_NONE; + static constexpr auto last = ROCPROFILER_MARKER_CORE_RANGE_API_ID_LAST; + static constexpr auto external_correlation_id_domain_idx = + ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_CORE_RANGE_API; +}; +} // namespace marker +} // namespace rocprofiler + +#if defined(ROCPROFILER_LIB_ROCPROFILER_SDK_MARKER_RANGE_MARKER_CPP_IMPL) && \ + ROCPROFILER_LIB_ROCPROFILER_SDK_MARKER_RANGE_MARKER_CPP_IMPL == 1 + +// clang-format off +MARKER_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange, roctx_core_api_table_t) + +MARKER_EVENT_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange, ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA, roctxMarkA, roctxMarkA_fn, message) +MARKER_RANGE_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange, ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA, roctxThreadRangeA, roctxRangePushA_fn, roctxRangePop_fn, message) +MARKER_RANGE_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange, ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA, roctxProcessRangeA, roctxRangeStartA_fn, roctxRangeStop_fn, message) +MARKER_EVENT_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange, ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId, roctxGetThreadId, roctxGetThreadId_fn, tid) +// clang-format on + +#else +# error \ + "Do not compile this file directly. It is included by lib/rocprofiler-sdk/marker/range_marker.cpp" +#endif diff --git a/source/lib/rocprofiler-sdk/registration.cpp b/source/lib/rocprofiler-sdk/registration.cpp index b62338df33..58f1d2599a 100644 --- a/source/lib/rocprofiler-sdk/registration.cpp +++ b/source/lib/rocprofiler-sdk/registration.cpp @@ -991,6 +991,8 @@ rocprofiler_set_api_table(const char* name, rocprofiler::marker::update_table(roctx_ctrl); rocprofiler::marker::update_table(roctx_name); + rocprofiler::marker::range::update_table(roctx_core, lib_instance); + // Tracing notifications the runtime has initialized rocprofiler::runtime_init::initialize( ROCPROFILER_RUNTIME_INITIALIZATION_MARKER, lib_version, lib_instance); diff --git a/source/lib/rocprofiler-sdk/tests/common.hpp b/source/lib/rocprofiler-sdk/tests/common.hpp index 7eeb7ab8e4..2095eac5de 100644 --- a/source/lib/rocprofiler-sdk/tests/common.hpp +++ b/source/lib/rocprofiler-sdk/tests/common.hpp @@ -189,6 +189,7 @@ get_buffer_tracing_names() ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API, ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API, ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, + ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API, }; auto cb_name_info = buffer_name_info{}; diff --git a/source/lib/rocprofiler-sdk/tests/naming.cpp b/source/lib/rocprofiler-sdk/tests/naming.cpp index 9078dca010..7686de9a5d 100644 --- a/source/lib/rocprofiler-sdk/tests/naming.cpp +++ b/source/lib/rocprofiler-sdk/tests/naming.cpp @@ -100,6 +100,9 @@ TEST(rocprofiler_lib, api_id_names) ROCPROFILER_MARKER_CONTROL_API_ID_LAST); EXPECT_EQ(buffered_names.operation_names.at(ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API).size(), ROCPROFILER_MARKER_NAME_API_ID_LAST); + EXPECT_EQ( + buffered_names.operation_names.at(ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API).size(), + ROCPROFILER_MARKER_CORE_RANGE_API_ID_LAST); // Code object callback EXPECT_EQ(callback_names.operation_names.at(ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT).size(), diff --git a/source/lib/rocprofiler-sdk/tests/roctx.cpp b/source/lib/rocprofiler-sdk/tests/roctx.cpp index b240a60d57..de6575b8a7 100644 --- a/source/lib/rocprofiler-sdk/tests/roctx.cpp +++ b/source/lib/rocprofiler-sdk/tests/roctx.cpp @@ -226,6 +226,8 @@ tool_tracing_buffered(rocprofiler_context_id_t context, ROCPROFILER_MARKER_CONTROL_API_ID_LAST); EXPECT_EQ(name_map.operation_names.at(ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API).size(), ROCPROFILER_MARKER_NAME_API_ID_LAST); + EXPECT_EQ(name_map.operation_names.at(ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API).size(), + ROCPROFILER_MARKER_CORE_RANGE_API_ID_LAST); auto v_records = std::vector{}; v_records.reserve(num_headers); diff --git a/tests/pytest-packages/tests/rocprofv3.py b/tests/pytest-packages/tests/rocprofv3.py index 600a6bb85b..9ece7ceae3 100644 --- a/tests/pytest-packages/tests/rocprofv3.py +++ b/tests/pytest-packages/tests/rocprofv3.py @@ -136,7 +136,8 @@ def test_otf2_data( def roctx_mark_filter(val): return ( None - if get_kind_name(val.kind) == "MARKER_CORE_API" + if get_kind_name(val.kind) + in ["MARKER_CORE_API", "MARKER_CORE_RANGE_API"] and get_operation_name(val.kind, val.operation) == "roctxMarkA" else val ) @@ -184,7 +185,12 @@ def test_rocpd_data( ), "marker": ( "marker_api", - ("MARKER_CORE_API", "MARKER_CONTROL_API", "MARKER_NAME_API"), + ( + "MARKER_CORE_API", + "MARKER_CONTROL_API", + "MARKER_NAME_API", + "MARKER_CORE_RANGE_API", + ), ), "kernel": ("kernel_dispatch", ("KERNEL_DISPATCH")), "memory_copy": ("memory_copy", ("MEMORY_COPY")), diff --git a/tests/python-bindings/validate.py b/tests/python-bindings/validate.py index 68dc6284c1..a71e81ee65 100644 --- a/tests/python-bindings/validate.py +++ b/tests/python-bindings/validate.py @@ -93,6 +93,7 @@ def test_marker_api_trace(marker_input_data): "MARKER_CORE_API", "MARKER_CONTROL_API", "MARKER_NAME_API", + "MARKER_CORE_RANGE_API", ] assert int(row["Process_Id"]) > 0 assert int(row["Thread_Id"]) == 0 or int(row["Thread_Id"]) >= int( @@ -111,7 +112,12 @@ def test_marker_api_trace_json(json_data): def get_kind_name(kind_id): return data.strings.buffer_records[kind_id]["kind"] - valid_domain = ("MARKER_CORE_API", "MARKER_CONTROL_API", "MARKER_NAME_API") + valid_domain = ( + "MARKER_CORE_API", + "MARKER_CONTROL_API", + "MARKER_NAME_API", + "MARKER_CORE_RANGE_API", + ) marker_data = data.buffer_records.marker_api diff --git a/tests/rocprofv3/python-bindings/validate.py b/tests/rocprofv3/python-bindings/validate.py index 8430c38e1f..648d879e98 100644 --- a/tests/rocprofv3/python-bindings/validate.py +++ b/tests/rocprofv3/python-bindings/validate.py @@ -69,6 +69,7 @@ def test_marker_api_trace(marker_input_data): for row in marker_input_data: assert row["Domain"] in [ "MARKER_CORE_API", + "MARKER_CORE_RANGE_API", "MARKER_CONTROL_API", "MARKER_NAME_API", ] @@ -89,7 +90,12 @@ def test_marker_api_trace_json(json_data): def get_kind_name(kind_id): return data.strings.buffer_records[kind_id]["kind"] - valid_domain = ("MARKER_CORE_API", "MARKER_CONTROL_API", "MARKER_NAME_API") + valid_domain = ( + "MARKER_CORE_API", + "MARKER_CONTROL_API", + "MARKER_NAME_API", + "MARKER_CORE_RANGE_API", + ) marker_data = data.buffer_records.marker_api diff --git a/tests/rocprofv3/roctracer-roctx/validate.py b/tests/rocprofv3/roctracer-roctx/validate.py index 4eb5e8de97..0245d802e4 100644 --- a/tests/rocprofv3/roctracer-roctx/validate.py +++ b/tests/rocprofv3/roctracer-roctx/validate.py @@ -38,7 +38,12 @@ def test_marker_api_trace(json_data): return itr.value return None - valid_domain = ("MARKER_CORE_API", "MARKER_CONTROL_API", "MARKER_NAME_API") + valid_domain = ( + "MARKER_CORE_API", + "MARKER_CONTROL_API", + "MARKER_NAME_API", + "MARKER_CORE_RANGE_API", + ) buffer_records = data["buffer_records"] marker_data = buffer_records["marker_api"] diff --git a/tests/rocprofv3/tracing-hip-in-libraries/validate.py b/tests/rocprofv3/tracing-hip-in-libraries/validate.py index eeb8305e15..91dc0a423e 100644 --- a/tests/rocprofv3/tracing-hip-in-libraries/validate.py +++ b/tests/rocprofv3/tracing-hip-in-libraries/validate.py @@ -102,15 +102,15 @@ def test_api_trace( hip_correlation_ids.append(cid) for row in marker_input_data: - assert row["Domain"] in [ - "MARKER_CORE_API", - ] + assert row["Domain"] in ["MARKER_CORE_API", "MARKER_CORE_RANGE_API"] assert int(row["Process_Id"]) > 0 assert int(row["Thread_Id"]) == 0 or int(row["Thread_Id"]) >= int( row["Process_Id"] ) assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"]) + functions.append(row["Function"]) cid = int(row["Correlation_Id"]) + # Correlation ID will be identical for MARKER_CORE_API and MARKER_CORE_RANGE_API marker_correlation_ids.append(cid) def get_sorted_unique(inp): @@ -218,6 +218,7 @@ def test_api_trace_json(json_data): valid_marker_domain = [ "MARKER_CORE_API", + "MARKER_CORE_RANGE_API", ] def get_operation_name(kind_id, op_id): @@ -253,6 +254,7 @@ def test_api_trace_json(json_data): assert metadata["pid"] > 0 assert api["thread_id"] == 0 or api["thread_id"] >= metadata["pid"] assert api["end_timestamp"] >= api["start_timestamp"] + functions.append(get_operation_name(api["kind"], api["operation"])) correlation_ids.append(api["correlation_id"]["internal"]) correlation_ids = sorted(list(set(correlation_ids))) diff --git a/tests/rocprofv3/tracing/validate.py b/tests/rocprofv3/tracing/validate.py index 690055c4d2..e22b57454f 100644 --- a/tests/rocprofv3/tracing/validate.py +++ b/tests/rocprofv3/tracing/validate.py @@ -287,6 +287,7 @@ def test_marker_api_trace(marker_input_data): "MARKER_CORE_API", "MARKER_CONTROL_API", "MARKER_NAME_API", + "MARKER_CORE_RANGE_API", ] assert int(row["Process_Id"]) > 0 assert int(row["Thread_Id"]) == 0 or int(row["Thread_Id"]) >= int( @@ -305,7 +306,12 @@ def test_marker_api_trace_json(json_data): def get_kind_name(kind_id): return data["strings"]["buffer_records"][kind_id]["kind"] - valid_domain = ("MARKER_CORE_API", "MARKER_CONTROL_API", "MARKER_NAME_API") + valid_domain = ( + "MARKER_CORE_API", + "MARKER_CONTROL_API", + "MARKER_NAME_API", + "MARKER_CORE_RANGE_API", + ) buffer_records = data["buffer_records"] marker_data = buffer_records["marker_api"] diff --git a/tests/tools/json-tool.cpp b/tests/tools/json-tool.cpp index 24d3e344bb..ed5cbf9452 100644 --- a/tests/tools/json-tool.cpp +++ b/tests/tools/json-tool.cpp @@ -771,7 +771,8 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record, } else if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API || record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API || - record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API) + record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API || + record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API) { auto* data = static_cast(record.payload); auto args = callback_arg_array_t{}; @@ -970,7 +971,8 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/, } else if(header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API || header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API || - header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API) + header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API || + header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API) { auto* record = static_cast(header->payload); @@ -1428,6 +1430,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) nullptr), "marker core api tracing service configure"); + // ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service( + // marker_api_callback_ctx, + // ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API, + // nullptr, + // 0, + // tool_tracing_callback, + // nullptr), + // "marker core api tracing service configure"); + ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service( marker_api_callback_ctx, ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API, @@ -1772,6 +1783,14 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) marker_api_buffered_buffer), "buffer tracing service configure"); + // ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service( + // marker_api_buffered_ctx, + // ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API, + // nullptr, + // 0, + // marker_api_buffered_buffer), + // "buffer tracing service configure"); + ROCPROFILER_CALL( rocprofiler_configure_buffer_tracing_service(marker_api_buffered_ctx, ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API,