From 29b25a3ef5b7ffa293f044eb2a14b5b7d998a048 Mon Sep 17 00:00:00 2001 From: Ammar ELWazir Date: Thu, 20 Apr 2023 13:29:02 +0000 Subject: [PATCH] Adding Async output for API Tracing If rocprofiler_set_api_trace_sync_callback is not called by the API client then it will be automatically saved in the buffer and it will be in async to the user to see the API trace data, sample for async api tracing is added to samples/tracer Change-Id: I01266a12b0eec172fdcffd7f04c89c2fe96174bb --- samples/CMakeLists.txt | 9 ++++ samples/tracer/sample_async.cpp | 77 ++++++++++++++++++++++++++++++ src/core/session/filter.cpp | 11 ++++- src/core/session/filter.h | 2 + src/core/session/session.cpp | 4 +- src/core/session/tracer/tracer.cpp | 20 ++++++-- src/core/session/tracer/tracer.h | 1 + 7 files changed, 116 insertions(+), 8 deletions(-) create mode 100644 samples/tracer/sample_async.cpp diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 5694540127..a0c12b6b29 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -117,6 +117,15 @@ target_link_options(tracer_hip_hsa PRIVATE "-Wl,--build-id=md5") add_dependencies(samples tracer_hip_hsa) install(TARGETS tracer_hip_hsa RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples) +## Build HIP/HSA Trace with async output api trace data Sample +set_source_files_properties(tracer/sample_async.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) +hip_add_executable(tracer_hip_hsa_async tracer/sample_async.cpp ${ROCPROFILER_UTIL_SRC_FILES}) +target_include_directories(tracer_hip_hsa_async PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/common) +target_link_libraries(tracer_hip_hsa_async PRIVATE ${ROCPROFILER_TARGET} amd_comgr) +target_link_options(tracer_hip_hsa_async PRIVATE "-Wl,--build-id=md5") +add_dependencies(samples tracer_hip_hsa_async) +install(TARGETS tracer_hip_hsa_async RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples) + # ############################################################################################################################################ # PC Sampling Samples # ############################################################################################################################################ diff --git a/samples/tracer/sample_async.cpp b/samples/tracer/sample_async.cpp new file mode 100644 index 0000000000..d769395cb8 --- /dev/null +++ b/samples/tracer/sample_async.cpp @@ -0,0 +1,77 @@ +#include "../common/common.h" + +int main(int argc, char** argv) { + int* gpuMem; + prepare(); + // Initialize the tools + CHECK_ROCPROFILER(rocprofiler_initialize()); + + // Creating the session with given replay mode + rocprofiler_session_id_t session_id; + CHECK_ROCPROFILER(rocprofiler_create_session(ROCPROFILER_KERNEL_REPLAY_MODE, &session_id)); + + // Creating Output Buffer for the data + rocprofiler_buffer_id_t buffer_id; + CHECK_ROCPROFILER(rocprofiler_create_buffer( + session_id, + [](const rocprofiler_record_header_t* record, const rocprofiler_record_header_t* end_record, + rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) { + WriteBufferRecords(record, end_record, session_id, buffer_id); + }, + 0x9999, &buffer_id)); + + // Tracing Filter + std::vector apis_requested; + apis_requested.emplace_back(ACTIVITY_DOMAIN_HIP_API); + apis_requested.emplace_back(ACTIVITY_DOMAIN_HIP_OPS); + apis_requested.emplace_back(ACTIVITY_DOMAIN_HSA_API); + apis_requested.emplace_back(ACTIVITY_DOMAIN_HSA_OPS); + apis_requested.emplace_back(ACTIVITY_DOMAIN_ROCTX); + rocprofiler_filter_id_t api_tracing_filter_id; + CHECK_ROCPROFILER(rocprofiler_create_filter( + session_id, ROCPROFILER_API_TRACE, rocprofiler_filter_data_t{&apis_requested[0]}, + apis_requested.size(), &api_tracing_filter_id, rocprofiler_filter_property_t{})); + CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, api_tracing_filter_id, buffer_id)); + + // Kernel Tracing + rocprofiler_filter_id_t kernel_tracing_filter_id; + CHECK_ROCPROFILER(rocprofiler_create_filter(session_id, ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION, + rocprofiler_filter_data_t{}, 0, &kernel_tracing_filter_id, + rocprofiler_filter_property_t{})); + CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, kernel_tracing_filter_id, buffer_id)); + + // Normal HIP Calls won't be traced + hipDeviceProp_t devProp; + HIP_CALL(hipGetDeviceProperties(&devProp, 0)); + HIP_CALL(hipMalloc((void**)&gpuMem, 1 * sizeof(int))); + // KernelA and KernelB won't be traced + kernelCalls('A'); + kernelCalls('B'); + + // Activating Profiling Session to profile whatever kernel launches occurs up + // till the next terminate session + CHECK_ROCPROFILER(rocprofiler_start_session(session_id)); + + // KernelC, KernelD, KernelE and KernelF to be traced as part of the session + kernelCalls('C'); + kernelCalls('D'); + kernelCalls('E'); + kernelCalls('F'); + // Normal HIP Calls that will be traced + HIP_CALL(hipFree(gpuMem)); + + // Deactivating session + CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id)); + + // Manual Flush user buffer request + CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id)); + + // Destroy sessions + CHECK_ROCPROFILER(rocprofiler_destroy_session(session_id)); + + // Destroy all profiling related objects(User buffer, sessions, filters, + // etc..) + CHECK_ROCPROFILER(rocprofiler_finalize()); + + return 0; +} \ No newline at end of file diff --git a/src/core/session/filter.cpp b/src/core/session/filter.cpp index 919c5b44ba..741012ebd4 100644 --- a/src/core/session/filter.cpp +++ b/src/core/session/filter.cpp @@ -233,9 +233,16 @@ std::variant, uint32_t*> Filter::GetProperty( return property; } -void Filter::SetCallback(rocprofiler_sync_callback_t& callback) { callback_ = callback; } +void Filter::SetCallback(rocprofiler_sync_callback_t& callback) { + callback_ = callback; + has_sync_callback_ = true; +} -rocprofiler_sync_callback_t& Filter::GetCallback() { return callback_; } +bool Filter::HasCallback() { return has_sync_callback_; } + +rocprofiler_sync_callback_t& Filter::GetCallback() { + return callback_; +} size_t Filter::GetPropertiesCount(rocprofiler_filter_property_kind_t kind) { switch (kind) { diff --git a/src/core/session/filter.h b/src/core/session/filter.h index 31a4be42cc..dd36d03ae2 100644 --- a/src/core/session/filter.h +++ b/src/core/session/filter.h @@ -50,6 +50,7 @@ class Filter { std::vector GetAttParametersData(); void SetCallback(rocprofiler_sync_callback_t& callback); rocprofiler_sync_callback_t& GetCallback(); + bool HasCallback(); void SetProperty(rocprofiler_filter_property_t property); std::variant, uint32_t*> GetProperty( @@ -75,6 +76,7 @@ class Filter { std::vector att_parameters_; // ATT Parameters rocprofiler_counters_sampler_parameters_t counters_sampler_parameters_; // sampled counters parameters + bool has_sync_callback_{false}; rocprofiler_sync_callback_t callback_; }; diff --git a/src/core/session/session.cpp b/src/core/session/session.cpp index c27701ec61..9b01951913 100644 --- a/src/core/session/session.cpp +++ b/src/core/session/session.cpp @@ -131,7 +131,9 @@ void Session::Start() { GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetTraceData(); if (!tracer_started_.load(std::memory_order_release)) { tracer_ = new tracer::Tracer( - session_id_, GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetCallback(), + session_id_, (GetFilter( + GetFilterIdWithKind(ROCPROFILER_API_TRACE))->HasCallback() ? GetFilter( + GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetCallback() : nullptr), GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetBufferId(), domains); tracer_started_.exchange(true, std::memory_order_release); } diff --git a/src/core/session/tracer/tracer.cpp b/src/core/session/tracer/tracer.cpp index 9907f61358..d922d7170b 100644 --- a/src/core/session/tracer/tracer.cpp +++ b/src/core/session/tracer/tracer.cpp @@ -39,9 +39,7 @@ Tracer::Tracer(rocprofiler_session_id_t session_id, rocprofiler_sync_callback_t : domains_(domains), callback_(callback), buffer_id_(buffer_id), session_id_(session_id) { assert(!is_active_.load(std::memory_order_release) && "Error: The tracer was initialized!"); std::lock_guard lock(tracer_lock_); - callback_data_ = api_callback_data_t{callback, session_id}; - is_active_.exchange(true, std::memory_order_release); } @@ -406,12 +404,20 @@ void Tracer::InitRoctracer( switch (domain.first) { case ACTIVITY_DOMAIN_ROCTX: { assert(!domain.second && "Error: ROCTX API can't be filtered!"); - roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, api_callback, &callback_data_); + if(callback_data_.user_sync_callback) + roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, api_callback, &callback_data_); + else + roctracer_enable_domain_activity(ACTIVITY_DOMAIN_ROCTX, + session_buffer_id_t{session_id_, buffer_id_}); break; } case ACTIVITY_DOMAIN_HSA_API: { if (!domain.second) { - roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, api_callback, &callback_data_); + if(callback_data_.user_sync_callback) + roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, api_callback, &callback_data_); + else + roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_API, + session_buffer_id_t{session_id_, buffer_id_}); } else { assert(!api_filter_data_vector.empty() && "Error: HSA API calls filter data is empty and domain " @@ -421,7 +427,11 @@ void Tracer::InitRoctracer( } case ACTIVITY_DOMAIN_HIP_API: { if (!domain.second) { - roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, api_callback, &callback_data_); + if(callback_data_.user_sync_callback) + roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, api_callback, &callback_data_); + else + roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API, + session_buffer_id_t{session_id_, buffer_id_}); } else { assert(!api_filter_data_vector.empty() && "Error: HIP API calls filter data is empty and domain " diff --git a/src/core/session/tracer/tracer.h b/src/core/session/tracer/tracer.h index c698959e6b..fe63b7b412 100644 --- a/src/core/session/tracer/tracer.h +++ b/src/core/session/tracer/tracer.h @@ -91,6 +91,7 @@ class Tracer { roctx_report_activity_; std::vector domains_; + bool is_sync_; rocprofiler_sync_callback_t callback_; rocprofiler_buffer_id_t buffer_id_; rocprofiler_session_id_t session_id_;