[SWDEV-516561][1/2] Add MARKER_RANGE_EXTENT to capture ROCTX ranges (#363)

* [SWDEV-516561][1/2] Add MARKER_RANGE_EXTENT to capture ROCTX ranges

Range extent to capture all work between roctxpush/pop operations. Entry callback takes place during roxtxpush and exit callback takes place in roctxpop. This is primarily to allow us to keep an ancestor id on the ancestor stack such that all operations that take place within the push/pop context can be annotated as being apart of this range. With the current setup (where push and pop are two separate operations that need to be combined externally), we cannot keep an ancestor id on the stack and thus cannot tie tracing events to particular ranges.

Correlation id information is inherited from the push operation. Ancestor id needs to be added in a future commit that also outputs this ancestor to CSV.

Output:

```
[ctest] {'size': 64, 'kind': 7, 'operation': 1, 'correlation_id': {'internal': 1525, 'external': 0, 'ancestor': 1524}, 'start_timestamp': 2932551479402642, 'end_timestamp': 2932551491178449, 'thread_id': 3254861}
[ctest] {'size': 64, 'kind': 8, 'operation': 2, 'correlation_id': {'internal': 1525, 'external': 0, 'ancestor': 1524}, 'start_timestamp': 2932551479405878, 'end_timestamp': 2932551491181214, 'thread_id': 3254861}
```

Note: Kind 8 = range extent op.

* Merge fix

Revert several changes

source/lib/rocprofiler-sdk/marker/range_marker.*

- separate out range marker implementation for standard marker implementation

Update public API with marker core range

Support marker core range in sdk (source/lib/rocprofiler-sdk)

Transition rocprofiler-sdk-tool and output lib to use marker core range

Misc fixes for tests

Fix logic in lib/output/generate{CSV,Stats}.cpp

Update tests/rocprofv3/tracing-hip-in-libraries (marker validation)

Fix test_otf2_data

* Test fixes

---------

Co-authored-by: Benjamin Welton <bewelton@amd.com>
This commit is contained in:
Welton, Benjamin
2025-07-08 23:41:22 -07:00
کامیت شده توسط GitHub
والد 2fa98c4d14
کامیت 2c4e20b951
35فایلهای تغییر یافته به همراه1391 افزوده شده و 129 حذف شده
@@ -1020,6 +1020,16 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_NAME_API_ID_roctxNameHipDevice);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_NAME_API_ID_roctxNameHipStream);
static_assert(ROCPROFILER_MARKER_NAME_API_ID_LAST == 4);
ROCPROFILER_ENUM_INFO(rocprofiler_marker_core_range_api_id_t,
0,
ROCPROFILER_MARKER_CORE_RANGE_API_ID_LAST,
false)
ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId);
static_assert(ROCPROFILER_MARKER_CORE_RANGE_API_ID_LAST == 4);
// rocprofiler_ompt_operation_t
ROCPROFILER_ENUM_INFO(rocprofiler_ompt_operation_t, 0, ROCPROFILER_OMPT_ID_LAST, false);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_OMPT_ID_thread_begin);
@@ -1307,7 +1317,8 @@ static_assert(ROCPROFILER_HIP_TABLE_ID_LAST == 2);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_TABLE_ID_RoctxCore);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_TABLE_ID_RoctxControl);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_TABLE_ID_RoctxName);
static_assert(ROCPROFILER_MARKER_TABLE_ID_LAST == 3);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange);
static_assert(ROCPROFILER_MARKER_TABLE_ID_LAST == 4);
// rocprofiler_rccl_table_id_t
ROCPROFILER_ENUM_LABEL(ROCPROFILER_RCCL_TABLE_ID);
@@ -1393,6 +1404,7 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_HSA_FINALIZE_EXT_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT);
@@ -1406,7 +1418,7 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_ROCJPEG_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_CALLBACK_TRACING_HIP_STREAM);
static_assert(ROCPROFILER_CALLBACK_TRACING_LAST == 21);
static_assert(ROCPROFILER_CALLBACK_TRACING_LAST == 22);
// rocprofiler_buffer_tracing_kind_t
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_NONE);
@@ -1417,6 +1429,7 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_HSA_FINALIZE_EXT_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY);
@@ -1441,7 +1454,7 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_EVENT_DROPPED_EVENTS);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_PAGE_MIGRATE);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_PAGE_FAULT);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_QUEUE);
static_assert(ROCPROFILER_BUFFER_TRACING_LAST == 32);
static_assert(ROCPROFILER_BUFFER_TRACING_LAST == 33);
// rocprofiler_code_object_operation_t
ROCPROFILER_ENUM_LABEL(ROCPROFILER_CODE_OBJECT_NONE);
@@ -1637,7 +1650,8 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_OMPT);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_ALLOCATION);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCDECODE_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCJPEG_API);
static_assert(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_LAST == 18);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_CORE_RANGE_API);
static_assert(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_LAST == 19);
// rocprofiler_thread_trace_parameter_type_t
ROCPROFILER_ENUM_LABEL(ROCPROFILER_THREAD_TRACE_PARAMETER_TARGET_CU);
@@ -211,6 +211,7 @@ ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(HSA_FINALIZE_EXT_API, hsa_api)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(HIP_RUNTIME_API, hip_api)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(HIP_COMPILER_API, hip_api)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(MARKER_CORE_API, marker_api)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(MARKER_CORE_RANGE_API, marker_api)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(MARKER_CONTROL_API, marker_api)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(MARKER_NAME_API, marker_api)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(MEMORY_COPY, memory_copy)
@@ -244,6 +245,7 @@ ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(HSA_FINALIZE_EXT_API, hsa_api)
ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(HIP_RUNTIME_API, hip_api)
ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(HIP_COMPILER_API, hip_api)
ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(MARKER_CORE_API, marker_api)
ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(MARKER_CORE_RANGE_API, marker_api)
ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(MARKER_CONTROL_API, marker_api)
ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(MARKER_NAME_API, marker_api)
ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(CODE_OBJECT, none)
@@ -54,24 +54,25 @@ ROCPROFILER_EXTERN_C_INIT
// NOLINTNEXTLINE(performance-enum-size)
typedef enum ROCPROFILER_SDK_EXPERIMENTAL rocprofiler_external_correlation_id_request_kind_t
{
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_NONE = 0, ///< Unknown kind
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_CORE_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_AMD_EXT_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_IMAGE_EXT_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_FINALIZE_EXT_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HIP_RUNTIME_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HIP_COMPILER_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_CORE_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_CONTROL_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_NAME_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_SCRATCH_MEMORY, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_RCCL_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_OMPT, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_ALLOCATION, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCDECODE_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCJPEG_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_NONE = 0, ///< Unknown kind
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_CORE_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_AMD_EXT_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_IMAGE_EXT_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HSA_FINALIZE_EXT_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HIP_RUNTIME_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_HIP_COMPILER_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_CORE_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_CONTROL_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_NAME_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_COPY, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_SCRATCH_MEMORY, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_RCCL_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_OMPT, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MEMORY_ALLOCATION, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCDECODE_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_ROCJPEG_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_CORE_RANGE_API, ///<
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_LAST,
} rocprofiler_external_correlation_id_request_kind_t;
@@ -175,6 +175,8 @@ typedef enum rocprofiler_callback_tracing_kind_t // NOLINT(performance-enum-siz
ROCPROFILER_CALLBACK_TRACING_ROCDECODE_API, ///< rocDecode API Tracing
ROCPROFILER_CALLBACK_TRACING_ROCJPEG_API, ///< rocJPEG API Tracing
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM, ///< @see ::rocprofiler_hip_stream_operation_t
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API, ///< @see
///< ::rocprofiler_marker_core_range_api_id_t
ROCPROFILER_CALLBACK_TRACING_LAST,
} rocprofiler_callback_tracing_kind_t;
@@ -224,7 +226,8 @@ typedef enum rocprofiler_buffer_tracing_kind_t // NOLINT(performance-enum-size)
ROCPROFILER_BUFFER_TRACING_KFD_PAGE_MIGRATE, ///< @see rocprofiler_kfd_page_migrate_operation_t
ROCPROFILER_BUFFER_TRACING_KFD_PAGE_FAULT, ///< @see rocprofiler_kfd_page_fault_operation_t
ROCPROFILER_BUFFER_TRACING_KFD_QUEUE, ///< @see rocprofiler_kfd_queue_operation_t
ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API, ///< @see
///< ::rocprofiler_marker_core_range_api_id_t
ROCPROFILER_BUFFER_TRACING_LAST,
/// @var ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT
@@ -101,6 +101,15 @@ typedef union rocprofiler_marker_api_args_t
const char* name;
const struct ihipStream_t* stream;
} roctxNameHipStream;
struct
{
const char* message;
} roctxThreadRangeA;
struct
{
const char* message;
// roctx_range_id_t id; // only set when range ends in callback tracing
} roctxProcessRangeA;
} rocprofiler_marker_api_args_t;
ROCPROFILER_EXTERN_C_FINI
@@ -54,3 +54,13 @@ typedef enum rocprofiler_marker_name_api_id_t // NOLINT(performance-enum-size)
ROCPROFILER_MARKER_NAME_API_ID_roctxNameHipStream,
ROCPROFILER_MARKER_NAME_API_ID_LAST,
} rocprofiler_marker_name_api_id_t;
typedef enum rocprofiler_marker_core_range_api_id_t // NOLINT(performance-enum-size)
{
ROCPROFILER_MARKER_CORE_RANGE_API_ID_NONE = -1,
ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA = 0,
ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA,
ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA,
ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId,
ROCPROFILER_MARKER_CORE_RANGE_API_ID_LAST,
} rocprofiler_marker_core_range_api_id_t;
@@ -29,5 +29,6 @@ typedef enum rocprofiler_marker_table_id_t
ROCPROFILER_MARKER_TABLE_ID_RoctxCore = 0,
ROCPROFILER_MARKER_TABLE_ID_RoctxControl,
ROCPROFILER_MARKER_TABLE_ID_RoctxName,
ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange,
ROCPROFILER_MARKER_TABLE_ID_LAST,
} rocprofiler_marker_table_id_t;
@@ -537,10 +537,10 @@ generate_csv(const output_config& cf
auto row_ss = std::stringstream{};
auto _name = std::string_view{};
if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API &&
(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA ||
record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA ||
record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA))
if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API &&
(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA ||
record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA ||
record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA))
{
_name = tool_metadata.get_marker_message(record.correlation_id.internal);
}
@@ -578,8 +578,8 @@ write_otf2(const output_config& cfg,
if(!_inp) return;
for(auto itr : *_inp)
{
if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API &&
itr.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA)
if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API &&
itr.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA)
continue;
using value_type = common::mpl::unqualified_type_t<decltype(itr)>;
@@ -589,8 +589,8 @@ write_otf2(const output_config& cfg,
rocprofiler_buffer_tracing_marker_api_record_t>::value)
{
paradigm = OTF2_PARADIGM_USER;
if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API &&
itr.operation != ROCPROFILER_MARKER_CORE_API_ID_roctxGetThreadId)
if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API &&
itr.operation != ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId)
name = tool_metadata.get_marker_message(itr.correlation_id.internal);
}
@@ -392,8 +392,8 @@ write_perfetto(
for(auto itr : marker_api_gen.get(ditr))
{
auto& track = thread_tracks.at(itr.thread_id);
auto name = (itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API &&
itr.operation != ROCPROFILER_MARKER_CORE_API_ID_roctxGetThreadId)
auto name = (itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API &&
itr.operation != ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId)
? tool_metadata.get_marker_message(itr.correlation_id.internal)
: buffer_names.at(itr.kind, itr.operation);
@@ -1249,10 +1249,10 @@ write_rocpd(
auto name = tool_metadata.buffer_names.at(itr.kind, itr.operation);
auto msg = std::string{"{}"};
if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API)
if(itr.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API)
{
if(static_cast<rocprofiler_tracing_operation_t>(itr.operation) !=
ROCPROFILER_MARKER_CORE_API_ID_roctxGetThreadId)
ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId)
{
// check generatePerfetto.cpp and generateOTF2.cpp, and the marker name in
// the view
@@ -147,10 +147,10 @@ generate_stats(const output_config& /*cfg*/,
{
auto _name = std::string_view{};
if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API &&
(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA ||
record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA ||
record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA))
if(record.kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API &&
(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA ||
record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA ||
record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA))
{
_name = tool_metadata.get_marker_message(record.correlation_id.internal);
}
@@ -124,8 +124,8 @@ struct marker_tracing_kind_conversion;
};
MAP_TRACING_KIND_CONVERSION(MARKER_API_CORE,
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API)
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API,
ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API)
MAP_TRACING_KIND_CONVERSION(MARKER_API_CONTROL,
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API)
@@ -567,7 +567,7 @@ kernel_rename_callback(rocprofiler_callback_tracing_record_t record,
{
if(!tool::get_config().kernel_rename || thread_dispatch_rename == nullptr) return;
if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API)
if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API)
{
auto* marker_data =
static_cast<rocprofiler_callback_tracing_marker_api_data_t*>(record.payload);
@@ -576,22 +576,22 @@ kernel_rename_callback(rocprofiler_callback_tracing_record_t record,
return std::string_view{*common::get_string_entry(_hash_v)};
};
if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA &&
if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA &&
record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT && marker_data->args.roctxMarkA.message)
{
thread_dispatch_rename->emplace(tool_metadata->add_kernel_rename_val(
add_message(marker_data->args.roctxMarkA.message), record.correlation_id.internal));
}
else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA &&
record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT &&
marker_data->args.roctxRangePushA.message)
else if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA &&
record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER &&
marker_data->args.roctxThreadRangeA.message)
{
thread_dispatch_rename->emplace(tool_metadata->add_kernel_rename_val(
add_message(marker_data->args.roctxRangePushA.message),
add_message(marker_data->args.roctxThreadRangeA.message),
record.correlation_id.internal));
}
else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePop &&
record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
else if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA &&
record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
{
ROCP_FATAL_IF(thread_dispatch_rename->empty())
<< "roctxRangePop invoked more times than roctxRangePush on thread "
@@ -600,6 +600,12 @@ kernel_rename_callback(rocprofiler_callback_tracing_record_t record,
thread_dispatch_rename->pop();
}
}
else
{
ROCP_CI_LOG(INFO) << fmt::format(
"Unsupported operation for {}",
tool_metadata->get_operation_name(record.kind, record.operation));
}
common::consume_args(user_data, data);
}
@@ -696,12 +702,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
rocprofiler_user_data_t* user_data,
void* data)
{
static thread_local auto stacked_range =
std::vector<rocprofiler_buffer_tracing_marker_api_record_t>{};
static auto global_range = common::Synchronized<
std::unordered_map<roctx_range_id_t, rocprofiler_buffer_tracing_marker_api_record_t>>{};
if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API)
if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API)
{
auto* marker_data =
static_cast<rocprofiler_callback_tracing_marker_api_data_t*>(record.payload);
@@ -709,7 +710,7 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
auto ts = rocprofiler_timestamp_t{};
rocprofiler_get_timestamp(&ts);
if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA)
if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA)
{
if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
{
@@ -728,83 +729,60 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
tool::write_ring_buffer(marker_record, domain_type::MARKER);
}
}
else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA)
else if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA)
{
if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
{
if(marker_data->args.roctxRangePushA.message)
user_data->value = ts;
if(marker_data->args.roctxThreadRangeA.message)
{
CHECK_NOTNULL(tool_metadata)
->add_marker_message(
record.correlation_id.internal,
std::string{marker_data->args.roctxRangePushA.message});
auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{};
marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t);
marker_record.kind = convert_marker_tracing_kind(record.kind);
marker_record.operation = record.operation;
marker_record.thread_id = record.thread_id;
marker_record.correlation_id = record.correlation_id;
marker_record.start_timestamp = ts;
marker_record.end_timestamp = 0;
stacked_range.emplace_back(marker_record);
std::string{marker_data->args.roctxThreadRangeA.message});
}
}
}
else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePop)
{
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
{
ROCP_FATAL_IF(stacked_range.empty())
<< "roctxRangePop invoked more times than roctxRangePush on thread "
<< rocprofiler::common::get_tid();
auto val = stacked_range.back();
stacked_range.pop_back();
val.end_timestamp = ts;
tool::write_ring_buffer(val, domain_type::MARKER);
}
}
else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStartA)
{
if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT &&
marker_data->args.roctxRangeStartA.message)
{
CHECK_NOTNULL(tool_metadata)
->add_marker_message(record.correlation_id.internal,
std::string{marker_data->args.roctxRangeStartA.message});
auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{};
marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t);
marker_record.kind = convert_marker_tracing_kind(record.kind);
marker_record.operation = record.operation;
marker_record.thread_id = record.thread_id;
marker_record.correlation_id = record.correlation_id;
marker_record.start_timestamp = ts;
marker_record.end_timestamp = 0;
marker_record.start_timestamp = user_data->value;
marker_record.end_timestamp = ts;
auto _id = marker_data->retval.roctx_range_id_t_retval;
global_range.wlock(
[](auto& map, roctx_range_id_t _range_id, auto&& _record) {
map.emplace(_range_id, std::move(_record));
},
_id,
marker_record);
tool::write_ring_buffer(marker_record, domain_type::MARKER);
}
}
else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangeStop)
else if(record.operation == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA)
{
if(record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
{
auto _id = marker_data->args.roctxRangeStop.id;
auto&& _entry = global_range.rlock(
[](const auto& map, auto _key) { return map.at(_key); }, _id);
user_data->value = ts;
_entry.end_timestamp = ts;
tool::write_ring_buffer(_entry, domain_type::MARKER);
global_range.wlock([](auto& map, auto _key) { return map.erase(_key); }, _id);
if(marker_data->args.roctxProcessRangeA.message)
{
CHECK_NOTNULL(tool_metadata)
->add_marker_message(
record.correlation_id.internal,
std::string{marker_data->args.roctxProcessRangeA.message});
}
}
else if(record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
{
auto marker_record = rocprofiler_buffer_tracing_marker_api_record_t{};
marker_record.size = sizeof(rocprofiler_buffer_tracing_marker_api_record_t);
marker_record.kind = convert_marker_tracing_kind(record.kind);
marker_record.operation = record.operation;
marker_record.thread_id = record.thread_id;
marker_record.correlation_id = record.correlation_id;
marker_record.start_timestamp = user_data->value;
marker_record.end_timestamp = ts;
tool::write_ring_buffer(marker_record, domain_type::MARKER);
}
}
else
@@ -827,6 +805,12 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
}
}
}
else
{
ROCP_CI_LOG(INFO) << fmt::format(
"Unsupported operation for {}",
tool_metadata->get_operation_name(record.kind, record.operation));
}
(void) data;
}
@@ -1737,7 +1721,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
{
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
get_client_ctx(),
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API,
nullptr,
0,
callbacks.callback_tracing,
@@ -1971,16 +1955,16 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
if(tool::get_config().kernel_rename)
{
auto rename_ctx = rocprofiler_context_id_t{0};
auto marker_core_api_kinds = std::array<rocprofiler_tracing_operation_t, 3>{
ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA,
ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA,
ROCPROFILER_MARKER_CORE_API_ID_roctxRangePop};
auto marker_core_api_kinds = std::array<rocprofiler_tracing_operation_t, 2>{
ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA,
ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA,
};
ROCPROFILER_CALL(rocprofiler_create_context(&rename_ctx), "failed to create context");
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
rename_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API,
marker_core_api_kinds.data(),
marker_core_api_kinds.size(),
callbacks.kernel_rename,
@@ -110,6 +110,7 @@ ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_EVENT_DROPPED_EVENTS)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_PAGE_MIGRATE)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_PAGE_FAULT)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_QUEUE)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(MARKER_CORE_RANGE_API)
template <size_t Idx, size_t... Tail>
std::pair<const char*, size_t>
@@ -334,6 +335,12 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_
val = rocprofiler::hip::stream::name_by_id(operation);
break;
}
case ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API:
{
val = rocprofiler::marker::name_by_id<ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange>(
operation);
break;
}
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE:
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT:
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE:
@@ -490,6 +497,11 @@ rocprofiler_iterate_buffer_tracing_kind_operations(
ops = rocprofiler::hip::stream::get_ids();
break;
}
case ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API:
{
ops = rocprofiler::marker::get_ids<ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange>();
break;
}
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE:
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT:
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE:
@@ -95,6 +95,7 @@ ROCPROFILER_CALLBACK_TRACING_KIND_STRING(RUNTIME_INITIALIZATION)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(ROCDECODE_API)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(ROCJPEG_API)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(HIP_STREAM)
ROCPROFILER_CALLBACK_TRACING_KIND_STRING(MARKER_CORE_RANGE_API)
template <size_t Idx, size_t... Tail>
std::pair<const char*, size_t>
@@ -294,6 +295,12 @@ rocprofiler_query_callback_tracing_kind_operation_name(rocprofiler_callback_trac
val = rocprofiler::hip::stream::name_by_id(operation);
break;
}
case ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API:
{
val = rocprofiler::marker::name_by_id<ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange>(
operation);
break;
}
};
if(!val)
@@ -438,6 +445,11 @@ rocprofiler_iterate_callback_tracing_kind_operations(
ops = rocprofiler::hip::stream::get_ids();
break;
}
case ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API:
{
ops = rocprofiler::marker::get_ids<ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange>();
break;
}
};
for(const auto& itr : ops)
@@ -513,6 +525,7 @@ rocprofiler_iterate_callback_tracing_kind_operation_args(
user_data);
return ROCPROFILER_STATUS_SUCCESS;
}
case ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API:
case ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API:
{
rocprofiler::marker::iterate_args<ROCPROFILER_MARKER_TABLE_ID_RoctxCore>(
@@ -66,6 +66,7 @@ ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KIND_STRING(RCCL_API)
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KIND_STRING(OMPT)
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KIND_STRING(ROCDECODE_API)
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KIND_STRING(ROCJPEG_API)
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KIND_STRING(MARKER_CORE_RANGE_API)
#undef ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KIND_STRING
@@ -1,6 +1,6 @@
#
#
set(ROCPROFILER_LIB_MARKER_SOURCES marker.cpp)
set(ROCPROFILER_LIB_MARKER_SOURCES marker.cpp range_marker.cpp)
set(ROCPROFILER_LIB_MARKER_HEADERS defines.hpp marker.hpp utils.hpp)
target_sources(rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_MARKER_SOURCES}
@@ -32,6 +32,7 @@
template <> \
struct roctx_api_info<MARKER_TABLE, MARKER_API_ID> : roctx_domain_info<MARKER_TABLE> \
{ \
static constexpr auto is_range = false; \
static constexpr auto table_idx = MARKER_TABLE; \
static constexpr auto operation_idx = MARKER_API_ID; \
static constexpr auto name = #MARKER_FUNC; \
@@ -111,6 +112,7 @@
template <> \
struct roctx_api_info<MARKER_TABLE, MARKER_API_ID> : roctx_domain_info<MARKER_TABLE> \
{ \
static constexpr auto is_range = false; \
static constexpr auto table_idx = MARKER_TABLE; \
static constexpr auto operation_idx = MARKER_API_ID; \
static constexpr auto name = #MARKER_FUNC; \
@@ -188,6 +190,199 @@
} \
}
#define MARKER_EVENT_API_INFO_DEFINITION_V( \
MARKER_TABLE, MARKER_API_ID, MARKER_NAME, MARKER_FUNC_PTR, ...) \
namespace rocprofiler \
{ \
namespace marker \
{ \
template <> \
struct roctx_api_info<MARKER_TABLE, MARKER_API_ID> : roctx_domain_info<MARKER_TABLE> \
{ \
static constexpr auto is_range = false; \
static constexpr auto table_idx = MARKER_TABLE; \
static constexpr auto operation_idx = MARKER_API_ID; \
static constexpr auto name = #MARKER_NAME; \
\
using domain_type = roctx_domain_info<table_idx>; \
using this_type = roctx_api_info<table_idx, operation_idx>; \
using base_type = roctx_api_impl<table_idx, operation_idx>; \
\
static constexpr auto callback_domain_idx = domain_type::callback_domain_idx; \
static constexpr auto buffered_domain_idx = domain_type::buffered_domain_idx; \
\
using domain_type::args_type; \
using domain_type::retval_type; \
using domain_type::callback_data_type; \
\
static constexpr auto offset() \
{ \
return offsetof(roctx_table_lookup<table_idx>::type, MARKER_FUNC_PTR); \
} \
\
static auto& get_table() { return roctx_table_lookup<table_idx>{}(); } \
\
template <typename TableT> \
static auto& get_table(TableT& _v) \
{ \
return roctx_table_lookup<table_idx>{}(_v); \
} \
\
template <typename TableT> \
static auto& get_table_func(TableT& _table) \
{ \
if constexpr(std::is_pointer<TableT>::value) \
{ \
assert(_table != nullptr && "nullptr to MARKER table for " #MARKER_NAME \
" function"); \
return _table->MARKER_FUNC_PTR; \
} \
else \
{ \
return _table.MARKER_FUNC_PTR; \
} \
} \
\
static auto& get_table_func() { return get_table_func(get_table()); } \
\
template <typename DataT> \
static auto& get_api_data_args(DataT& _data) \
{ \
return _data.MARKER_NAME; \
} \
\
template <typename RetT, typename... Args> \
static auto get_functor(RetT (*)(Args...)) \
{ \
return &base_type::functor<RetT, Args...>; \
} \
\
static std::vector<void*> as_arg_addr(callback_data_type trace_data) \
{ \
return std::vector<void*>{ \
GET_ADDR_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)}; \
} \
\
static auto as_arg_list(callback_data_type trace_data, int32_t max_deref) \
{ \
return utils::stringize( \
max_deref, \
GET_NAMED_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)); \
} \
}; \
} \
}
#define MARKER_RANGE_API_INFO_DEFINITION_V( \
MARKER_TABLE, MARKER_API_ID, MARKER_NAME, MARKER_PUSH_FUNC_PTR, MARKER_POP_FUNC_PTR, ...) \
namespace rocprofiler \
{ \
namespace marker \
{ \
template <> \
struct roctx_api_info<MARKER_TABLE, MARKER_API_ID> : roctx_domain_info<MARKER_TABLE> \
{ \
static constexpr auto is_range = true; \
static constexpr auto table_idx = MARKER_TABLE; \
static constexpr auto operation_idx = MARKER_API_ID; \
static constexpr auto name = #MARKER_NAME; \
\
using domain_type = roctx_domain_info<table_idx>; \
using this_type = roctx_api_info<table_idx, operation_idx>; \
using base_type = roctx_api_impl<table_idx, operation_idx>; \
\
static constexpr auto callback_domain_idx = domain_type::callback_domain_idx; \
static constexpr auto buffered_domain_idx = domain_type::buffered_domain_idx; \
\
using domain_type::args_type; \
using domain_type::retval_type; \
using domain_type::callback_data_type; \
\
static constexpr auto push_offset() \
{ \
return offsetof(roctx_table_lookup<table_idx>::type, MARKER_PUSH_FUNC_PTR); \
} \
\
static constexpr auto pop_offset() \
{ \
return offsetof(roctx_table_lookup<table_idx>::type, MARKER_POP_FUNC_PTR); \
} \
\
static auto& get_table() { return roctx_table_lookup<table_idx>{}(); } \
\
template <typename TableT> \
static auto& get_table(TableT& _v) \
{ \
return roctx_table_lookup<table_idx>{}(_v); \
} \
\
template <typename TableT> \
static auto& get_push_table_func(TableT& _table) \
{ \
if constexpr(std::is_pointer<TableT>::value) \
{ \
assert(_table != nullptr && "nullptr to MARKER table for " #MARKER_NAME \
" function"); \
return _table->MARKER_PUSH_FUNC_PTR; \
} \
else \
{ \
return _table.MARKER_PUSH_FUNC_PTR; \
} \
} \
\
template <typename TableT> \
static auto& get_pop_table_func(TableT& _table) \
{ \
if constexpr(std::is_pointer<TableT>::value) \
{ \
assert(_table != nullptr && "nullptr to MARKER table for " #MARKER_NAME \
" function"); \
return _table->MARKER_POP_FUNC_PTR; \
} \
else \
{ \
return _table.MARKER_POP_FUNC_PTR; \
} \
} \
\
static auto& get_push_table_func() { return get_push_table_func(get_table()); } \
static auto& get_pop_table_func() { return get_pop_table_func(get_table()); } \
\
template <typename DataT> \
static auto& get_api_data_args(DataT& _data) \
{ \
return _data.MARKER_NAME; \
} \
\
template <typename RetT, typename... Args> \
static auto get_push_functor(RetT (*)(Args...)) \
{ \
return &base_type::push_functor<RetT, Args...>; \
} \
\
template <typename RetT, typename... Args> \
static auto get_pop_functor(RetT (*)(Args...)) \
{ \
return &base_type::pop_functor<RetT, Args...>; \
} \
\
static std::vector<void*> as_arg_addr(callback_data_type trace_data) \
{ \
return std::vector<void*>{ \
GET_ADDR_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)}; \
} \
\
static auto as_arg_list(callback_data_type trace_data, int32_t max_deref) \
{ \
return utils::stringize( \
max_deref, \
GET_NAMED_MEMBER_FIELDS(get_api_data_args(trace_data.args), __VA_ARGS__)); \
} \
}; \
} \
}
#define MARKER_API_TABLE_LOOKUP_DEFINITION(TABLE_ID, TYPE) \
namespace rocprofiler \
{ \
@@ -218,3 +413,28 @@
}; \
} \
}
#define MARKER_API_TABLE_LOOKUP_DEFINITION_ALT(TABLE_ID, TYPE) \
namespace rocprofiler \
{ \
namespace marker \
{ \
namespace \
{ \
template <> \
auto* get_table<TABLE_ID>() \
{ \
return get_table_impl<TYPE>(); \
} \
} \
\
template <> \
struct roctx_table_lookup<TABLE_ID> \
{ \
using type = TYPE; \
auto& operator()(type& _v) const { return _v; } \
auto& operator()(type* _v) const { return *_v; } \
auto& operator()() const { return (*this)(get_table<TABLE_ID>()); } \
}; \
} \
}
@@ -26,6 +26,7 @@
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/buffer.hpp"
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/context/correlation_id.hpp"
#include "lib/rocprofiler-sdk/marker/utils.hpp"
#include "lib/rocprofiler-sdk/registration.hpp"
#include "lib/rocprofiler-sdk/tracing/tracing.hpp"
@@ -23,6 +23,7 @@
#include "lib/rocprofiler-sdk/marker/defines.hpp"
#include "lib/rocprofiler-sdk/marker/marker.hpp"
#include <rocprofiler-sdk/buffer_tracing.h>
#include <rocprofiler-sdk/external_correlation.h>
#include <rocprofiler-sdk/marker/table_id.h>
@@ -63,6 +63,12 @@ struct roctx_api_impl : roctx_domain_info<TableIdx>
template <typename RetT, typename... Args>
static RetT functor(Args... args);
template <typename RetT, typename... Args>
static RetT push_functor(Args... args);
template <typename RetT, typename... Args>
static RetT pop_functor(Args... args);
};
template <size_t TableIdx>
@@ -96,5 +102,12 @@ copy_table(TableT* _orig, uint64_t _tbl_instance);
template <typename TableT>
void
update_table(TableT* _orig);
namespace range
{
template <typename TableT>
void
update_table(TableT* _orig, uint64_t _tbl_instance);
}
} // namespace marker
} // namespace rocprofiler
@@ -0,0 +1,846 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "lib/common/defines.hpp"
#include "lib/common/static_object.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/buffer.hpp"
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/context/correlation_id.hpp"
#include "lib/rocprofiler-sdk/marker/marker.hpp"
#include "lib/rocprofiler-sdk/marker/utils.hpp"
#include "lib/rocprofiler-sdk/registration.hpp"
#include "lib/rocprofiler-sdk/tracing/tracing.hpp"
#include <rocprofiler-sdk/buffer.h>
#include <rocprofiler-sdk/callback_tracing.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/marker.h>
#include <rocprofiler-sdk-roctx/roctx.h>
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <type_traits>
#include <utility>
namespace rocprofiler
{
namespace marker
{
namespace
{
struct null_type
{};
template <typename Tp>
auto
get_default_retval()
{
if constexpr(std::is_integral<Tp>::value)
return Tp{0};
else
static_assert(std::is_empty<Tp>::value, "Error! unsupported return type");
}
template <typename DataT, typename Tp>
void
set_data_retval(DataT& _data, [[maybe_unused]] Tp _val)
{
if constexpr(std::is_same<int32_t, Tp>::value)
_data.int32_t_retval = _val;
else if constexpr(std::is_same<int64_t, Tp>::value)
_data.int64_t_retval = _val;
else if constexpr(std::is_same<roctx_range_id_t, Tp>::value)
_data.roctx_range_id_t_retval = _val;
else
static_assert(std::is_empty<Tp>::value, "Error! unsupported return type");
}
template <typename Tp>
Tp*
get_table_impl()
{
static auto*& _v = common::static_object<Tp>::construct(common::init_public_api_struct(Tp{}));
return _v;
}
template <size_t TableIdx>
auto*
get_table();
struct range_data_t : public tracing::tracing_data
{
using callback_api_data_t = rocprofiler_callback_tracing_marker_api_data_t;
using buffered_api_data_t = rocprofiler_buffer_tracing_marker_api_record_t;
callback_api_data_t callback_data = common::init_public_api_struct(callback_api_data_t{});
buffered_api_data_t buffer_record = common::init_public_api_struct(buffered_api_data_t{});
context::correlation_id* corr_id = nullptr;
rocprofiler_thread_id_t thread_id = common::get_tid();
};
auto&
get_range_thread_stack()
{
static thread_local auto push_op_stack = common::container::small_vector<range_data_t, 8>{};
return push_op_stack;
}
auto&
get_range_process_stack()
{
static auto push_op_stack =
common::Synchronized<std::unordered_map<roctx_range_id_t, range_data_t>>{};
return push_op_stack;
}
} // namespace
template <size_t TableIdx, size_t OpIdx>
template <typename DataArgsT, typename... Args>
auto
roctx_api_impl<TableIdx, OpIdx>::set_data_args(DataArgsT& _data_args, Args... args)
{
if constexpr(sizeof...(Args) == 0)
_data_args.no_args.empty = '\0';
else
_data_args = DataArgsT{args...};
}
template <size_t TableIdx, size_t OpIdx>
template <typename FuncT, typename... Args>
auto
roctx_api_impl<TableIdx, OpIdx>::exec(FuncT&& _func, Args&&... args)
{
using return_type = std::decay_t<std::invoke_result_t<FuncT, Args...>>;
if(_func)
{
if constexpr(std::is_void<return_type>::value)
{
_func(std::forward<Args>(args)...);
return null_type{};
}
else
{
return _func(std::forward<Args>(args)...);
}
}
using info_type = roctx_api_info<TableIdx, OpIdx>;
ROCP_ERROR << "nullptr to next roctx function for " << info_type::name << " ("
<< info_type::operation_idx << ")";
if constexpr(std::is_void<return_type>::value)
return null_type{};
else
return get_default_retval<return_type>();
}
template <size_t TableIdx, size_t OpIdx>
template <typename RetT, typename... Args>
RetT
roctx_api_impl<TableIdx, OpIdx>::functor(Args... args)
{
using info_type = roctx_api_info<TableIdx, OpIdx>;
using callback_api_data_t = typename roctx_domain_info<TableIdx>::callback_data_type;
using buffered_api_data_t = typename roctx_domain_info<TableIdx>::buffer_data_type;
constexpr auto external_corr_id_domain_idx =
roctx_domain_info<TableIdx>::external_correlation_id_domain_idx;
ROCP_INFO_IF(registration::get_fini_status() != 0) << "Executing " << info_type::name;
auto thr_id = common::get_tid();
auto callback_contexts = tracing::callback_context_data_vec_t{};
auto buffered_contexts = tracing::buffered_context_data_vec_t{};
auto external_corr_ids = tracing::external_correlation_id_map_t{};
tracing::populate_contexts(info_type::callback_domain_idx,
info_type::buffered_domain_idx,
info_type::operation_idx,
callback_contexts,
buffered_contexts,
external_corr_ids);
if(callback_contexts.empty() && buffered_contexts.empty())
{
[[maybe_unused]] auto _ret = exec(info_type::get_table_func(), std::forward<Args>(args)...);
if constexpr(!std::is_void<RetT>::value)
return _ret;
else
return;
}
auto ref_count = 2;
auto buffer_record = common::init_public_api_struct(buffered_api_data_t{});
auto callback_data = common::init_public_api_struct(callback_api_data_t{});
auto* corr_id = tracing::correlation_service::construct(ref_count);
auto internal_corr_id = corr_id->internal;
auto ancestor_corr_id = corr_id->ancestor;
tracing::populate_external_correlation_ids(external_corr_ids,
thr_id,
external_corr_id_domain_idx,
info_type::operation_idx,
internal_corr_id);
// invoke the callbacks
if(!callback_contexts.empty())
{
set_data_args(info_type::get_api_data_args(callback_data.args),
std::forward<Args>(args)...);
tracing::execute_phase_enter_callbacks(callback_contexts,
thr_id,
internal_corr_id,
external_corr_ids,
ancestor_corr_id,
info_type::callback_domain_idx,
info_type::operation_idx,
callback_data);
}
// enter callback may update the external correlation id field
tracing::update_external_correlation_ids(
external_corr_ids, thr_id, external_corr_id_domain_idx);
// record the start timestamp as close to the function call as possible
if(!buffered_contexts.empty())
{
buffer_record.start_timestamp = common::timestamp_ns();
}
// decrement the reference count before invoking
corr_id->sub_ref_count();
auto _ret = exec(info_type::get_table_func(), std::forward<Args>(args)...);
// record the end timestamp as close to the function call as possible
if(!buffered_contexts.empty())
{
buffer_record.end_timestamp = common::timestamp_ns();
}
if(!callback_contexts.empty())
{
set_data_retval(callback_data.retval, _ret);
tracing::execute_phase_exit_callbacks(callback_contexts,
external_corr_ids,
info_type::callback_domain_idx,
info_type::operation_idx,
callback_data);
}
if(!buffered_contexts.empty())
{
tracing::execute_buffer_record_emplace(buffered_contexts,
thr_id,
internal_corr_id,
external_corr_ids,
ancestor_corr_id,
info_type::buffered_domain_idx,
info_type::operation_idx,
buffer_record);
}
// decrement the reference count after usage in the callback/buffers
corr_id->sub_ref_count();
context::pop_latest_correlation_id(corr_id);
if constexpr(!std::is_void<RetT>::value) return _ret;
}
template <size_t TableIdx, size_t OpIdx>
template <typename RetT, typename... Args>
RetT
roctx_api_impl<TableIdx, OpIdx>::push_functor(Args... args)
{
using info_type = roctx_api_info<TableIdx, OpIdx>;
constexpr auto external_corr_id_domain_idx =
roctx_domain_info<TableIdx>::external_correlation_id_domain_idx;
ROCP_INFO_IF(registration::get_fini_status() != 0) << "Executing " << info_type::name;
auto thr_id = common::get_tid();
auto range_data = range_data_t{};
auto& external_corr_ids = range_data.external_correlation_ids;
tracing::populate_contexts(info_type::callback_domain_idx,
info_type::buffered_domain_idx,
info_type::operation_idx,
range_data);
if(range_data.empty())
{
[[maybe_unused]] auto _ret =
exec(info_type::get_push_table_func(), std::forward<Args>(args)...);
if constexpr(!std::is_void<RetT>::value)
return _ret;
else
return;
}
auto ref_count = 1;
auto& buffer_record = range_data.buffer_record;
auto& callback_data = range_data.callback_data;
auto*& corr_id = range_data.corr_id;
corr_id = tracing::correlation_service::construct(ref_count);
auto internal_corr_id = corr_id->internal;
auto ancestor_corr_id = corr_id->ancestor;
tracing::populate_external_correlation_ids(external_corr_ids,
thr_id,
external_corr_id_domain_idx,
info_type::operation_idx,
internal_corr_id);
// invoke the callbacks
if(!range_data.callback_contexts.empty())
{
set_data_args(info_type::get_api_data_args(callback_data.args),
std::forward<Args>(args)...);
tracing::execute_phase_enter_callbacks(range_data.callback_contexts,
thr_id,
internal_corr_id,
external_corr_ids,
ancestor_corr_id,
info_type::callback_domain_idx,
info_type::operation_idx,
callback_data);
}
// enter callback may update the external correlation id field
tracing::update_external_correlation_ids(
external_corr_ids, thr_id, external_corr_id_domain_idx);
// record the start timestamp as close to the function call as possible
if(!range_data.buffered_contexts.empty())
{
buffer_record.start_timestamp = common::timestamp_ns();
}
auto _ret = exec(info_type::get_push_table_func(), std::forward<Args>(args)...);
if(!range_data.callback_contexts.empty())
{
set_data_retval(callback_data.retval, _ret);
}
if constexpr(OpIdx == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA)
{
get_range_thread_stack().emplace_back(std::move(range_data));
}
else if constexpr(OpIdx == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA)
{
// push the range data to the process stack
get_range_process_stack().wlock(
[](auto& _stack, auto _key, auto&& _range_data) {
_stack.emplace(_key, std::move(_range_data));
},
_ret,
std::move(range_data));
}
if constexpr(!std::is_void<RetT>::value) return _ret;
}
template <size_t TableIdx, size_t OpIdx>
template <typename RetT, typename... Args>
RetT
roctx_api_impl<TableIdx, OpIdx>::pop_functor(Args... args)
{
using info_type = roctx_api_info<TableIdx, OpIdx>;
auto range_data = range_data_t{};
if constexpr(OpIdx == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA)
{
if(auto& _range_stack = get_range_thread_stack(); !_range_stack.empty())
{
// if the range API is used, we need to use the range tracing data
// for push/pop operations, otherwise we can use the main API tracing
range_data = _range_stack.back();
_range_stack.pop_back();
}
}
else if constexpr(OpIdx == ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA)
{
auto range_id = std::get<0>(std::tie(args...));
static_assert(sizeof...(Args) == 1,
"roctxRangeStopA requires a single argument of type roctx_range_id_t");
// push the range data to the process stack
get_range_process_stack().wlock(
[](auto& _range_stack, auto _key, auto& _dst) {
// find the data for the range id if it exists, copy it and delete it
if(auto itr = _range_stack.find(_key); itr != _range_stack.end())
{
_dst = _range_stack.at(_key);
_range_stack.erase(itr);
}
},
range_id,
range_data);
}
auto _ret = exec(info_type::get_pop_table_func(), std::forward<Args>(args)...);
if(range_data.empty())
{
if constexpr(!std::is_void<RetT>::value)
return _ret;
else
return;
}
auto& external_corr_ids = range_data.external_correlation_ids;
auto& buffer_record = range_data.buffer_record;
auto& callback_data = range_data.callback_data;
auto*& corr_id = range_data.corr_id;
ROCP_FATAL_IF(!corr_id) << fmt::format("No correlation id found for range pop operation :: {}",
info_type::name);
auto thr_id = range_data.thread_id;
auto internal_corr_id = corr_id->internal;
auto ancestor_corr_id = corr_id->ancestor;
// record the end timestamp as close to the function call as possible
if(!range_data.buffered_contexts.empty())
{
buffer_record.end_timestamp = common::timestamp_ns();
}
if(!range_data.callback_contexts.empty())
{
tracing::execute_phase_exit_callbacks(range_data.callback_contexts,
external_corr_ids,
info_type::callback_domain_idx,
info_type::operation_idx,
callback_data);
}
if(!range_data.buffered_contexts.empty())
{
tracing::execute_buffer_record_emplace(range_data.buffered_contexts,
thr_id,
internal_corr_id,
external_corr_ids,
ancestor_corr_id,
info_type::buffered_domain_idx,
info_type::operation_idx,
buffer_record);
}
// decrement the reference count after usage in the callback/buffers
corr_id->sub_ref_count();
context::pop_latest_correlation_id(corr_id);
if constexpr(!std::is_void<RetT>::value) return _ret;
}
} // namespace marker
} // namespace rocprofiler
#define ROCPROFILER_LIB_ROCPROFILER_SDK_MARKER_RANGE_MARKER_CPP_IMPL 1
// template specializations
#include "range_marker.def.cpp"
namespace rocprofiler
{
namespace marker
{
namespace
{
template <size_t TableIdx, size_t OpIdx, size_t... OpIdxTail>
const char*
name_by_id(const uint32_t id, std::index_sequence<OpIdx, OpIdxTail...>)
{
if(OpIdx == id) return roctx_api_info<TableIdx, OpIdx>::name;
if constexpr(sizeof...(OpIdxTail) > 0)
return name_by_id<TableIdx>(id, std::index_sequence<OpIdxTail...>{});
else
return nullptr;
}
template <size_t TableIdx, size_t OpIdx, size_t... OpIdxTail>
uint32_t
id_by_name(const char* name, std::index_sequence<OpIdx, OpIdxTail...>)
{
if(std::string_view{roctx_api_info<TableIdx, OpIdx>::name} == std::string_view{name})
return roctx_api_info<TableIdx, OpIdx>::operation_idx;
if constexpr(sizeof...(OpIdxTail) > 0)
return id_by_name<TableIdx>(name, std::index_sequence<OpIdxTail...>{});
else
return roctx_domain_info<TableIdx>::none;
}
template <size_t TableIdx, size_t OpIdx, size_t... OpIdxTail>
void
get_ids(std::vector<uint32_t>& _id_list, std::index_sequence<OpIdx, OpIdxTail...>)
{
auto _idx = roctx_api_info<TableIdx, OpIdx>::operation_idx;
if(_idx < roctx_domain_info<TableIdx>::last) _id_list.emplace_back(_idx);
if constexpr(sizeof...(OpIdxTail) > 0)
get_ids<TableIdx>(_id_list, std::index_sequence<OpIdxTail...>{});
}
template <size_t TableIdx, size_t OpIdx, size_t... OpIdxTail>
void
get_names(std::vector<const char*>& _name_list, std::index_sequence<OpIdx, OpIdxTail...>)
{
auto&& _name = roctx_api_info<TableIdx, OpIdx>::name;
if(_name != nullptr && strnlen(_name, 1) > 0) _name_list.emplace_back(_name);
if constexpr(sizeof...(OpIdxTail) > 0)
get_names<TableIdx>(_name_list, std::index_sequence<OpIdxTail...>{});
}
template <size_t TableIdx, size_t OpIdx, size_t... OpIdxTail>
void
iterate_args(const uint32_t id,
const rocprofiler_callback_tracing_marker_api_data_t& data,
rocprofiler_callback_tracing_operation_args_cb_t func,
int32_t max_deref,
void* user_data,
std::index_sequence<OpIdx, OpIdxTail...>)
{
if(OpIdx == id)
{
using info_type = roctx_api_info<TableIdx, OpIdx>;
auto&& arg_list = info_type::as_arg_list(data, max_deref);
auto&& arg_addr = info_type::as_arg_addr(data);
for(size_t i = 0; i < std::min(arg_list.size(), arg_addr.size()); ++i)
{
auto ret = func(info_type::callback_domain_idx, // kind
id, // operation
i, // arg_number
arg_addr.at(i), // arg_value_addr
arg_list.at(i).indirection_level, // indirection
arg_list.at(i).type, // arg_type
arg_list.at(i).name, // arg_name
arg_list.at(i).value.c_str(), // arg_value_str
arg_list.at(i).dereference_count, // num deref in str
user_data);
if(ret != 0) break;
}
return;
}
if constexpr(sizeof...(OpIdxTail) > 0)
iterate_args<TableIdx>(
id, data, func, max_deref, user_data, std::index_sequence<OpIdxTail...>{});
}
} // namespace
// check out the assembly here... this compiles to a switch statement
template <size_t TableIdx>
const char*
name_by_id(uint32_t id)
{
return name_by_id<TableIdx>(id, std::make_index_sequence<roctx_domain_info<TableIdx>::last>{});
}
template <size_t TableIdx>
uint32_t
id_by_name(const char* name)
{
return id_by_name<TableIdx>(name,
std::make_index_sequence<roctx_domain_info<TableIdx>::last>{});
}
template <size_t TableIdx>
std::vector<uint32_t>
get_ids()
{
constexpr auto last_api_id = roctx_domain_info<TableIdx>::last;
auto _data = std::vector<uint32_t>{};
_data.reserve(last_api_id);
get_ids<TableIdx>(_data, std::make_index_sequence<last_api_id>{});
return _data;
}
template <size_t TableIdx>
std::vector<const char*>
get_names()
{
constexpr auto last_api_id = roctx_domain_info<TableIdx>::last;
auto _data = std::vector<const char*>{};
_data.reserve(last_api_id);
get_names<TableIdx>(_data, std::make_index_sequence<last_api_id>{});
return _data;
}
template <size_t TableIdx>
void
iterate_args(uint32_t id,
const rocprofiler_callback_tracing_marker_api_data_t& data,
rocprofiler_callback_tracing_operation_args_cb_t callback,
int32_t max_deref,
void* user_data)
{
if(callback)
iterate_args<TableIdx>(id,
data,
callback,
max_deref,
user_data,
std::make_index_sequence<roctx_domain_info<TableIdx>::last>{});
}
namespace range
{
namespace
{
bool
should_wrap_functor(rocprofiler_callback_tracing_kind_t _callback_domain,
rocprofiler_buffer_tracing_kind_t _buffered_domain,
int _operation)
{
// we loop over all the *registered* contexts and see if any of them, at any point in time,
// might require callback or buffered API tracing
for(const auto& itr : context::get_registered_contexts())
{
if(!itr) continue;
// if there is a callback tracer enabled for the given domain and op, we need to wrap
if(itr->callback_tracer && itr->callback_tracer->domains(_callback_domain) &&
itr->callback_tracer->domains(_callback_domain, _operation))
return true;
// if there is a buffered tracer enabled for the given domain and op, we need to wrap
if(itr->buffered_tracer && itr->buffered_tracer->domains(_buffered_domain) &&
itr->buffered_tracer->domains(_buffered_domain, _operation))
return true;
}
return false;
}
template <size_t TableIdx, typename Tp, size_t OpIdx>
void
copy_table(Tp* _orig, uint64_t _tbl_instance, std::integral_constant<size_t, OpIdx>)
{
using table_type = typename roctx_table_lookup<TableIdx>::type;
if constexpr(std::is_same<table_type, Tp>::value)
{
auto _info = roctx_api_info<TableIdx, OpIdx>{};
if constexpr(_info.is_range)
{
// make sure we don't access a field that doesn't exist in input table
// NOLINTNEXTLINE(misc-redundant-expression)
if(_info.push_offset() >= _orig->size || _info.pop_offset() >= _orig->size) return;
// 1. get the sub-table containing the function pointer in original table
// 2. get reference to function pointer in sub-table in original table
auto& _orig_table = _info.get_table(_orig);
auto& _orig_push_func = _info.get_push_table_func(_orig_table);
auto& _orig_pop_func = _info.get_pop_table_func(_orig_table);
// 3. get the sub-table containing the function pointer in saved table
// 4. get reference to function pointer in sub-table in saved table
// 5. save the original function in the saved table
auto& _copy_table = _info.get_table(*get_table<TableIdx>());
auto& _push_copy_func = _info.get_push_table_func(_copy_table);
auto& _pop_copy_func = _info.get_pop_table_func(_copy_table);
ROCP_FATAL_IF(_push_copy_func && _tbl_instance == 0)
<< _info.name << " has non-null function pointer " << _push_copy_func
<< " despite this being the first instance of the library being copies";
ROCP_FATAL_IF(_pop_copy_func && _tbl_instance == 0)
<< _info.name << " has non-null function pointer " << _pop_copy_func
<< " despite this being the first instance of the library being copies";
if(!_push_copy_func || !_pop_copy_func)
{
ROCP_TRACE << "copying table entry for " << _info.name;
_push_copy_func = _orig_push_func;
_pop_copy_func = _orig_pop_func;
}
else
{
ROCP_TRACE << "skipping copying table entry for " << _info.name
<< " from table instance " << _tbl_instance;
}
}
else
{
// make sure we don't access a field that doesn't exist in input table
if(_info.offset() >= _orig->size) return;
// 1. get the sub-table containing the function pointer in original table
// 2. get reference to function pointer in sub-table in original table
auto& _orig_table = _info.get_table(_orig);
auto& _orig_func = _info.get_table_func(_orig_table);
// 3. get the sub-table containing the function pointer in saved table
// 4. get reference to function pointer in sub-table in saved table
// 5. save the original function in the saved table
auto& _copy_table = _info.get_table(*get_table<TableIdx>());
auto& _copy_func = _info.get_table_func(_copy_table);
ROCP_FATAL_IF(_copy_func && _tbl_instance == 0)
<< _info.name << " has non-null function pointer " << _copy_func
<< " despite this being the first instance of the library being copies";
if(!_copy_func)
{
ROCP_TRACE << "copying table entry for " << _info.name;
_copy_func = _orig_func;
}
else
{
ROCP_TRACE << "skipping copying table entry for " << _info.name
<< " from table instance " << _tbl_instance;
}
}
}
}
template <size_t TableIdx, typename Tp, size_t OpIdx>
void
update_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
{
using table_type = typename roctx_table_lookup<TableIdx>::type;
if constexpr(std::is_same<table_type, Tp>::value)
{
auto _info = roctx_api_info<TableIdx, OpIdx>{};
if constexpr(_info.is_range)
{
// make sure we don't access a field that doesn't exist in input table
// NOLINTNEXTLINE(misc-redundant-expression)
if(_info.push_offset() >= _orig->size || _info.pop_offset() >= _orig->size) return;
// check to see if there are any contexts which enable this operation in the ROCTX API
// domain
if(!should_wrap_functor(
_info.callback_domain_idx, _info.buffered_domain_idx, _info.operation_idx))
return;
ROCP_TRACE << "updating table entry for " << _info.name;
// 1. get the sub-table containing the function pointer in original table
// 2. get reference to function pointer in sub-table in original table
// 3. update function pointer with wrapper
auto& _table = _info.get_table(_orig);
auto& _push_func = _info.get_push_table_func(_table);
_push_func = _info.get_push_functor(_push_func);
auto& _pop_func = _info.get_pop_table_func(_table);
_pop_func = _info.get_pop_functor(_pop_func);
}
else
{
// make sure we don't access a field that doesn't exist in input table
if(_info.offset() >= _orig->size) return;
// check to see if there are any contexts which enable this operation in the ROCTX API
// domain
if(!should_wrap_functor(
_info.callback_domain_idx, _info.buffered_domain_idx, _info.operation_idx))
return;
ROCP_TRACE << "updating table entry for " << _info.name;
// 1. get the sub-table containing the function pointer in original table
// 2. get reference to function pointer in sub-table in original table
// 3. update function pointer with wrapper
auto& _table = _info.get_table(_orig);
auto& _func = _info.get_table_func(_table);
_func = _info.get_functor(_func);
}
}
}
template <size_t TableIdx, typename Tp, size_t OpIdx, size_t... OpIdxTail>
void
copy_table(Tp* _orig, uint64_t _tbl_instance, std::index_sequence<OpIdx, OpIdxTail...>)
{
copy_table<TableIdx>(_orig, _tbl_instance, std::integral_constant<size_t, OpIdx>{});
if constexpr(sizeof...(OpIdxTail) > 0)
copy_table<TableIdx>(_orig, _tbl_instance, std::index_sequence<OpIdxTail...>{});
}
template <size_t TableIdx, typename Tp, size_t OpIdx, size_t... OpIdxTail>
void
update_table(Tp* _orig, std::index_sequence<OpIdx, OpIdxTail...>)
{
update_table<TableIdx>(_orig, std::integral_constant<size_t, OpIdx>{});
if constexpr(sizeof...(OpIdxTail) > 0)
update_table<TableIdx>(_orig, std::index_sequence<OpIdxTail...>{});
}
} // namespace
template <typename TableT>
void
copy_table(TableT* _orig, uint64_t _tbl_instance)
{
constexpr auto TableIdx = roctx_table_id_lookup<TableT>::value;
if(_orig)
copy_table<TableIdx>(
_orig, _tbl_instance, std::make_index_sequence<roctx_domain_info<TableIdx>::last>{});
}
template <typename TableT>
void
update_table(TableT* _orig, uint64_t _instv)
{
constexpr auto TableIdx = roctx_table_id_lookup<TableT>::value;
if(_orig)
{
copy_table(_orig, _instv);
update_table<TableIdx>(_orig,
std::make_index_sequence<roctx_domain_info<TableIdx>::last>{});
}
}
} // namespace range
using iterate_args_data_t = rocprofiler_callback_tracing_marker_api_data_t;
using iterate_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t;
#define INSTANTIATE_MARKER_TABLE_FUNC(TABLE_TYPE, TABLE_IDX) \
template void range::update_table<TABLE_TYPE>(TABLE_TYPE * _tbl, uint64_t _instv); \
template const char* name_by_id<TABLE_IDX>(uint32_t); \
template uint32_t id_by_name<TABLE_IDX>(const char*); \
template std::vector<uint32_t> get_ids<TABLE_IDX>(); \
template std::vector<const char*> get_names<TABLE_IDX>(); \
template void iterate_args<TABLE_IDX>( \
uint32_t, const iterate_args_data_t&, iterate_args_cb_t, int32_t, void*);
INSTANTIATE_MARKER_TABLE_FUNC(roctx_core_api_table_t, ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange)
#undef INSTANTIATE_MARKER_TABLE_FUNC
} // namespace marker
} // namespace rocprofiler
@@ -0,0 +1,73 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "lib/rocprofiler-sdk/marker/defines.hpp"
#include "lib/rocprofiler-sdk/marker/marker.hpp"
#include <rocprofiler-sdk/buffer_tracing.h>
#include <rocprofiler-sdk/external_correlation.h>
#include <rocprofiler-sdk/marker/table_id.h>
namespace rocprofiler
{
namespace marker
{
template <>
struct roctx_domain_info<ROCPROFILER_MARKER_TABLE_ID_LAST>
{
using args_type = rocprofiler_marker_api_args_t;
using retval_type = rocprofiler_marker_api_retval_t;
using callback_data_type = rocprofiler_callback_tracing_marker_api_data_t;
using buffer_data_type = rocprofiler_buffer_tracing_marker_api_record_t;
};
template <>
struct roctx_domain_info<ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange>
: roctx_domain_info<ROCPROFILER_MARKER_TABLE_ID_LAST>
{
using enum_type = rocprofiler_marker_core_range_api_id_t;
static constexpr auto callback_domain_idx = ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API;
static constexpr auto buffered_domain_idx = ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API;
static constexpr auto none = ROCPROFILER_MARKER_CORE_RANGE_API_ID_NONE;
static constexpr auto last = ROCPROFILER_MARKER_CORE_RANGE_API_ID_LAST;
static constexpr auto external_correlation_id_domain_idx =
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_MARKER_CORE_RANGE_API;
};
} // namespace marker
} // namespace rocprofiler
#if defined(ROCPROFILER_LIB_ROCPROFILER_SDK_MARKER_RANGE_MARKER_CPP_IMPL) && \
ROCPROFILER_LIB_ROCPROFILER_SDK_MARKER_RANGE_MARKER_CPP_IMPL == 1
// clang-format off
MARKER_API_TABLE_LOOKUP_DEFINITION(ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange, roctx_core_api_table_t)
MARKER_EVENT_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange, ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxMarkA, roctxMarkA, roctxMarkA_fn, message)
MARKER_RANGE_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange, ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxThreadRangeA, roctxThreadRangeA, roctxRangePushA_fn, roctxRangePop_fn, message)
MARKER_RANGE_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange, ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxProcessRangeA, roctxProcessRangeA, roctxRangeStartA_fn, roctxRangeStop_fn, message)
MARKER_EVENT_API_INFO_DEFINITION_V(ROCPROFILER_MARKER_TABLE_ID_RoctxCoreRange, ROCPROFILER_MARKER_CORE_RANGE_API_ID_roctxGetThreadId, roctxGetThreadId, roctxGetThreadId_fn, tid)
// clang-format on
#else
# error \
"Do not compile this file directly. It is included by lib/rocprofiler-sdk/marker/range_marker.cpp"
#endif
@@ -991,6 +991,8 @@ rocprofiler_set_api_table(const char* name,
rocprofiler::marker::update_table(roctx_ctrl);
rocprofiler::marker::update_table(roctx_name);
rocprofiler::marker::range::update_table(roctx_core, lib_instance);
// Tracing notifications the runtime has initialized
rocprofiler::runtime_init::initialize(
ROCPROFILER_RUNTIME_INITIALIZATION_MARKER, lib_version, lib_instance);
@@ -189,6 +189,7 @@ get_buffer_tracing_names()
ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API,
ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API,
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API,
};
auto cb_name_info = buffer_name_info{};
@@ -100,6 +100,9 @@ TEST(rocprofiler_lib, api_id_names)
ROCPROFILER_MARKER_CONTROL_API_ID_LAST);
EXPECT_EQ(buffered_names.operation_names.at(ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API).size(),
ROCPROFILER_MARKER_NAME_API_ID_LAST);
EXPECT_EQ(
buffered_names.operation_names.at(ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API).size(),
ROCPROFILER_MARKER_CORE_RANGE_API_ID_LAST);
// Code object callback
EXPECT_EQ(callback_names.operation_names.at(ROCPROFILER_CALLBACK_TRACING_CODE_OBJECT).size(),
@@ -226,6 +226,8 @@ tool_tracing_buffered(rocprofiler_context_id_t context,
ROCPROFILER_MARKER_CONTROL_API_ID_LAST);
EXPECT_EQ(name_map.operation_names.at(ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API).size(),
ROCPROFILER_MARKER_NAME_API_ID_LAST);
EXPECT_EQ(name_map.operation_names.at(ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API).size(),
ROCPROFILER_MARKER_CORE_RANGE_API_ID_LAST);
auto v_records = std::vector<rocprofiler_buffer_tracing_marker_api_record_t*>{};
v_records.reserve(num_headers);
@@ -136,7 +136,8 @@ def test_otf2_data(
def roctx_mark_filter(val):
return (
None
if get_kind_name(val.kind) == "MARKER_CORE_API"
if get_kind_name(val.kind)
in ["MARKER_CORE_API", "MARKER_CORE_RANGE_API"]
and get_operation_name(val.kind, val.operation) == "roctxMarkA"
else val
)
@@ -184,7 +185,12 @@ def test_rocpd_data(
),
"marker": (
"marker_api",
("MARKER_CORE_API", "MARKER_CONTROL_API", "MARKER_NAME_API"),
(
"MARKER_CORE_API",
"MARKER_CONTROL_API",
"MARKER_NAME_API",
"MARKER_CORE_RANGE_API",
),
),
"kernel": ("kernel_dispatch", ("KERNEL_DISPATCH")),
"memory_copy": ("memory_copy", ("MEMORY_COPY")),
@@ -93,6 +93,7 @@ def test_marker_api_trace(marker_input_data):
"MARKER_CORE_API",
"MARKER_CONTROL_API",
"MARKER_NAME_API",
"MARKER_CORE_RANGE_API",
]
assert int(row["Process_Id"]) > 0
assert int(row["Thread_Id"]) == 0 or int(row["Thread_Id"]) >= int(
@@ -111,7 +112,12 @@ def test_marker_api_trace_json(json_data):
def get_kind_name(kind_id):
return data.strings.buffer_records[kind_id]["kind"]
valid_domain = ("MARKER_CORE_API", "MARKER_CONTROL_API", "MARKER_NAME_API")
valid_domain = (
"MARKER_CORE_API",
"MARKER_CONTROL_API",
"MARKER_NAME_API",
"MARKER_CORE_RANGE_API",
)
marker_data = data.buffer_records.marker_api
@@ -69,6 +69,7 @@ def test_marker_api_trace(marker_input_data):
for row in marker_input_data:
assert row["Domain"] in [
"MARKER_CORE_API",
"MARKER_CORE_RANGE_API",
"MARKER_CONTROL_API",
"MARKER_NAME_API",
]
@@ -89,7 +90,12 @@ def test_marker_api_trace_json(json_data):
def get_kind_name(kind_id):
return data.strings.buffer_records[kind_id]["kind"]
valid_domain = ("MARKER_CORE_API", "MARKER_CONTROL_API", "MARKER_NAME_API")
valid_domain = (
"MARKER_CORE_API",
"MARKER_CONTROL_API",
"MARKER_NAME_API",
"MARKER_CORE_RANGE_API",
)
marker_data = data.buffer_records.marker_api
@@ -38,7 +38,12 @@ def test_marker_api_trace(json_data):
return itr.value
return None
valid_domain = ("MARKER_CORE_API", "MARKER_CONTROL_API", "MARKER_NAME_API")
valid_domain = (
"MARKER_CORE_API",
"MARKER_CONTROL_API",
"MARKER_NAME_API",
"MARKER_CORE_RANGE_API",
)
buffer_records = data["buffer_records"]
marker_data = buffer_records["marker_api"]
@@ -102,15 +102,15 @@ def test_api_trace(
hip_correlation_ids.append(cid)
for row in marker_input_data:
assert row["Domain"] in [
"MARKER_CORE_API",
]
assert row["Domain"] in ["MARKER_CORE_API", "MARKER_CORE_RANGE_API"]
assert int(row["Process_Id"]) > 0
assert int(row["Thread_Id"]) == 0 or int(row["Thread_Id"]) >= int(
row["Process_Id"]
)
assert int(row["End_Timestamp"]) >= int(row["Start_Timestamp"])
functions.append(row["Function"])
cid = int(row["Correlation_Id"])
# Correlation ID will be identical for MARKER_CORE_API and MARKER_CORE_RANGE_API
marker_correlation_ids.append(cid)
def get_sorted_unique(inp):
@@ -218,6 +218,7 @@ def test_api_trace_json(json_data):
valid_marker_domain = [
"MARKER_CORE_API",
"MARKER_CORE_RANGE_API",
]
def get_operation_name(kind_id, op_id):
@@ -253,6 +254,7 @@ def test_api_trace_json(json_data):
assert metadata["pid"] > 0
assert api["thread_id"] == 0 or api["thread_id"] >= metadata["pid"]
assert api["end_timestamp"] >= api["start_timestamp"]
functions.append(get_operation_name(api["kind"], api["operation"]))
correlation_ids.append(api["correlation_id"]["internal"])
correlation_ids = sorted(list(set(correlation_ids)))
@@ -287,6 +287,7 @@ def test_marker_api_trace(marker_input_data):
"MARKER_CORE_API",
"MARKER_CONTROL_API",
"MARKER_NAME_API",
"MARKER_CORE_RANGE_API",
]
assert int(row["Process_Id"]) > 0
assert int(row["Thread_Id"]) == 0 or int(row["Thread_Id"]) >= int(
@@ -305,7 +306,12 @@ def test_marker_api_trace_json(json_data):
def get_kind_name(kind_id):
return data["strings"]["buffer_records"][kind_id]["kind"]
valid_domain = ("MARKER_CORE_API", "MARKER_CONTROL_API", "MARKER_NAME_API")
valid_domain = (
"MARKER_CORE_API",
"MARKER_CONTROL_API",
"MARKER_NAME_API",
"MARKER_CORE_RANGE_API",
)
buffer_records = data["buffer_records"]
marker_data = buffer_records["marker_api"]
+21 -2
مشاهده پرونده
@@ -771,7 +771,8 @@ tool_tracing_callback(rocprofiler_callback_tracing_record_t record,
}
else if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API ||
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_NAME_API ||
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API)
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API ||
record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API)
{
auto* data = static_cast<rocprofiler_callback_tracing_marker_api_data_t*>(record.payload);
auto args = callback_arg_array_t{};
@@ -970,7 +971,8 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_API ||
header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API ||
header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API)
header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API ||
header->kind == ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API)
{
auto* record =
static_cast<rocprofiler_buffer_tracing_marker_api_record_t*>(header->payload);
@@ -1428,6 +1430,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
nullptr),
"marker core api tracing service configure");
// ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
// marker_api_callback_ctx,
// ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_RANGE_API,
// nullptr,
// 0,
// tool_tracing_callback,
// nullptr),
// "marker core api tracing service configure");
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
marker_api_callback_ctx,
ROCPROFILER_CALLBACK_TRACING_MARKER_CONTROL_API,
@@ -1772,6 +1783,14 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
marker_api_buffered_buffer),
"buffer tracing service configure");
// ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service(
// marker_api_buffered_ctx,
// ROCPROFILER_BUFFER_TRACING_MARKER_CORE_RANGE_API,
// nullptr,
// 0,
// marker_api_buffered_buffer),
// "buffer tracing service configure");
ROCPROFILER_CALL(
rocprofiler_configure_buffer_tracing_service(marker_api_buffered_ctx,
ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API,