diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 5694540127..a0c12b6b29 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -117,6 +117,15 @@ target_link_options(tracer_hip_hsa PRIVATE "-Wl,--build-id=md5") add_dependencies(samples tracer_hip_hsa) install(TARGETS tracer_hip_hsa RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples) +## Build HIP/HSA Trace with async output api trace data Sample +set_source_files_properties(tracer/sample_async.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) +hip_add_executable(tracer_hip_hsa_async tracer/sample_async.cpp ${ROCPROFILER_UTIL_SRC_FILES}) +target_include_directories(tracer_hip_hsa_async PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/common) +target_link_libraries(tracer_hip_hsa_async PRIVATE ${ROCPROFILER_TARGET} amd_comgr) +target_link_options(tracer_hip_hsa_async PRIVATE "-Wl,--build-id=md5") +add_dependencies(samples tracer_hip_hsa_async) +install(TARGETS tracer_hip_hsa_async RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples) + # ############################################################################################################################################ # PC Sampling Samples # ############################################################################################################################################ diff --git a/samples/tracer/sample_async.cpp b/samples/tracer/sample_async.cpp new file mode 100644 index 0000000000..d769395cb8 --- /dev/null +++ b/samples/tracer/sample_async.cpp @@ -0,0 +1,77 @@ +#include "../common/common.h" + +int main(int argc, char** argv) { + int* gpuMem; + prepare(); + // Initialize the tools + CHECK_ROCPROFILER(rocprofiler_initialize()); + + // Creating the session with given replay mode + rocprofiler_session_id_t session_id; + CHECK_ROCPROFILER(rocprofiler_create_session(ROCPROFILER_KERNEL_REPLAY_MODE, &session_id)); + + // Creating Output Buffer for the data + rocprofiler_buffer_id_t buffer_id; + CHECK_ROCPROFILER(rocprofiler_create_buffer( + session_id, + [](const rocprofiler_record_header_t* record, const rocprofiler_record_header_t* end_record, + rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) { + WriteBufferRecords(record, end_record, session_id, buffer_id); + }, + 0x9999, &buffer_id)); + + // Tracing Filter + std::vector apis_requested; + apis_requested.emplace_back(ACTIVITY_DOMAIN_HIP_API); + apis_requested.emplace_back(ACTIVITY_DOMAIN_HIP_OPS); + apis_requested.emplace_back(ACTIVITY_DOMAIN_HSA_API); + apis_requested.emplace_back(ACTIVITY_DOMAIN_HSA_OPS); + apis_requested.emplace_back(ACTIVITY_DOMAIN_ROCTX); + rocprofiler_filter_id_t api_tracing_filter_id; + CHECK_ROCPROFILER(rocprofiler_create_filter( + session_id, ROCPROFILER_API_TRACE, rocprofiler_filter_data_t{&apis_requested[0]}, + apis_requested.size(), &api_tracing_filter_id, rocprofiler_filter_property_t{})); + CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, api_tracing_filter_id, buffer_id)); + + // Kernel Tracing + rocprofiler_filter_id_t kernel_tracing_filter_id; + CHECK_ROCPROFILER(rocprofiler_create_filter(session_id, ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION, + rocprofiler_filter_data_t{}, 0, &kernel_tracing_filter_id, + rocprofiler_filter_property_t{})); + CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, kernel_tracing_filter_id, buffer_id)); + + // Normal HIP Calls won't be traced + hipDeviceProp_t devProp; + HIP_CALL(hipGetDeviceProperties(&devProp, 0)); + HIP_CALL(hipMalloc((void**)&gpuMem, 1 * sizeof(int))); + // KernelA and KernelB won't be traced + kernelCalls('A'); + kernelCalls('B'); + + // Activating Profiling Session to profile whatever kernel launches occurs up + // till the next terminate session + CHECK_ROCPROFILER(rocprofiler_start_session(session_id)); + + // KernelC, KernelD, KernelE and KernelF to be traced as part of the session + kernelCalls('C'); + kernelCalls('D'); + kernelCalls('E'); + kernelCalls('F'); + // Normal HIP Calls that will be traced + HIP_CALL(hipFree(gpuMem)); + + // Deactivating session + CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id)); + + // Manual Flush user buffer request + CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id)); + + // Destroy sessions + CHECK_ROCPROFILER(rocprofiler_destroy_session(session_id)); + + // Destroy all profiling related objects(User buffer, sessions, filters, + // etc..) + CHECK_ROCPROFILER(rocprofiler_finalize()); + + return 0; +} \ No newline at end of file diff --git a/src/core/session/filter.cpp b/src/core/session/filter.cpp index 919c5b44ba..741012ebd4 100644 --- a/src/core/session/filter.cpp +++ b/src/core/session/filter.cpp @@ -233,9 +233,16 @@ std::variant, uint32_t*> Filter::GetProperty( return property; } -void Filter::SetCallback(rocprofiler_sync_callback_t& callback) { callback_ = callback; } +void Filter::SetCallback(rocprofiler_sync_callback_t& callback) { + callback_ = callback; + has_sync_callback_ = true; +} -rocprofiler_sync_callback_t& Filter::GetCallback() { return callback_; } +bool Filter::HasCallback() { return has_sync_callback_; } + +rocprofiler_sync_callback_t& Filter::GetCallback() { + return callback_; +} size_t Filter::GetPropertiesCount(rocprofiler_filter_property_kind_t kind) { switch (kind) { diff --git a/src/core/session/filter.h b/src/core/session/filter.h index 31a4be42cc..dd36d03ae2 100644 --- a/src/core/session/filter.h +++ b/src/core/session/filter.h @@ -50,6 +50,7 @@ class Filter { std::vector GetAttParametersData(); void SetCallback(rocprofiler_sync_callback_t& callback); rocprofiler_sync_callback_t& GetCallback(); + bool HasCallback(); void SetProperty(rocprofiler_filter_property_t property); std::variant, uint32_t*> GetProperty( @@ -75,6 +76,7 @@ class Filter { std::vector att_parameters_; // ATT Parameters rocprofiler_counters_sampler_parameters_t counters_sampler_parameters_; // sampled counters parameters + bool has_sync_callback_{false}; rocprofiler_sync_callback_t callback_; }; diff --git a/src/core/session/session.cpp b/src/core/session/session.cpp index c27701ec61..9b01951913 100644 --- a/src/core/session/session.cpp +++ b/src/core/session/session.cpp @@ -131,7 +131,9 @@ void Session::Start() { GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetTraceData(); if (!tracer_started_.load(std::memory_order_release)) { tracer_ = new tracer::Tracer( - session_id_, GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetCallback(), + session_id_, (GetFilter( + GetFilterIdWithKind(ROCPROFILER_API_TRACE))->HasCallback() ? GetFilter( + GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetCallback() : nullptr), GetFilter(GetFilterIdWithKind(ROCPROFILER_API_TRACE))->GetBufferId(), domains); tracer_started_.exchange(true, std::memory_order_release); } diff --git a/src/core/session/tracer/tracer.cpp b/src/core/session/tracer/tracer.cpp index 9907f61358..d922d7170b 100644 --- a/src/core/session/tracer/tracer.cpp +++ b/src/core/session/tracer/tracer.cpp @@ -39,9 +39,7 @@ Tracer::Tracer(rocprofiler_session_id_t session_id, rocprofiler_sync_callback_t : domains_(domains), callback_(callback), buffer_id_(buffer_id), session_id_(session_id) { assert(!is_active_.load(std::memory_order_release) && "Error: The tracer was initialized!"); std::lock_guard lock(tracer_lock_); - callback_data_ = api_callback_data_t{callback, session_id}; - is_active_.exchange(true, std::memory_order_release); } @@ -406,12 +404,20 @@ void Tracer::InitRoctracer( switch (domain.first) { case ACTIVITY_DOMAIN_ROCTX: { assert(!domain.second && "Error: ROCTX API can't be filtered!"); - roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, api_callback, &callback_data_); + if(callback_data_.user_sync_callback) + roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, api_callback, &callback_data_); + else + roctracer_enable_domain_activity(ACTIVITY_DOMAIN_ROCTX, + session_buffer_id_t{session_id_, buffer_id_}); break; } case ACTIVITY_DOMAIN_HSA_API: { if (!domain.second) { - roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, api_callback, &callback_data_); + if(callback_data_.user_sync_callback) + roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, api_callback, &callback_data_); + else + roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_API, + session_buffer_id_t{session_id_, buffer_id_}); } else { assert(!api_filter_data_vector.empty() && "Error: HSA API calls filter data is empty and domain " @@ -421,7 +427,11 @@ void Tracer::InitRoctracer( } case ACTIVITY_DOMAIN_HIP_API: { if (!domain.second) { - roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, api_callback, &callback_data_); + if(callback_data_.user_sync_callback) + roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, api_callback, &callback_data_); + else + roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API, + session_buffer_id_t{session_id_, buffer_id_}); } else { assert(!api_filter_data_vector.empty() && "Error: HIP API calls filter data is empty and domain " diff --git a/src/core/session/tracer/tracer.h b/src/core/session/tracer/tracer.h index c698959e6b..fe63b7b412 100644 --- a/src/core/session/tracer/tracer.h +++ b/src/core/session/tracer/tracer.h @@ -91,6 +91,7 @@ class Tracer { roctx_report_activity_; std::vector domains_; + bool is_sync_; rocprofiler_sync_callback_t callback_; rocprofiler_buffer_id_t buffer_id_; rocprofiler_session_id_t session_id_;